0.8.38+ (kg load optimizations)

2025-12-03 11:45:01 -05:00 · 2012-02-06 04:03:24 -07:00 · 2012-02-06 04:03:24 -07:00 · dbd588f94e
commit dbd588f94e
parent 68e0e3a81d 7396d7a69d
111 changed files with 7682 additions and 6921 deletions
--- a/recipes/foreignaffairs.recipe
+++ b/recipes/foreignaffairs.recipe
@ -3,10 +3,17 @@ import re
 from calibre.ptempfile import PersistentTemporaryFile
 class ForeignAffairsRecipe(BasicNewsRecipe):
    ''' there are three modifications:
    1) fetch issue cover
    2) toggle ignore premium articles
    3) extract proper section names, ie. "Comments", "Essay"
    by Chen Wei weichen302@gmx.com, 2012-02-05'''
    __license__  = 'GPL v3'
    __author__ = 'kwetal'
    language = 'en'
-    version = 1
+    version = 1.01
    title = u'Foreign Affairs (Subcription or (free) Registration)'
    publisher = u'Council on Foreign Relations'
@ -17,6 +24,9 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
    remove_javascript = True
    INDEX = 'http://www.foreignaffairs.com'
    FRONTPAGE = 'http://www.foreignaffairs.com/magazine'
    INCLUDE_PREMIUM = False
    remove_tags = []
    remove_tags.append(dict(name = 'base'))
@ -37,6 +47,12 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
    temp_files = []
    articles_are_obfuscated = True
    def get_cover_url(self):
        soup = self.index_to_soup(self.FRONTPAGE)
        div = soup.find('div', attrs={'class':'inthemag-issuebuy-cover'})
        img_url =  div.find('img')['src']
        return self.INDEX + img_url
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
@ -50,57 +66,47 @@ class ForeignAffairsRecipe(BasicNewsRecipe):
        return self.temp_files[-1].name
    def parse_index(self):
        soup = self.index_to_soup('http://www.foreignaffairs.com/magazine')
        articles = []
        answer = []
-        content = soup.find('div', attrs = {'class': 'center-wrapper'})
+        soup = self.index_to_soup(self.FRONTPAGE)
-        if content:
+        sec_start = soup.findAll('div', attrs={'class':'panel-separator'})
-            for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
+        for sec in sec_start:
-                tag = div.find('div', attrs = {'class': 'views-field-title'})
+            content = sec.nextSibling
-                if tag:
+            if content:
-                    a = tag.find('a')
+                section = self.tag_to_string(content.find('h2'))
                    if a:
                        title = self.tag_to_string(a)
                        url = self.INDEX + a['href']
                        author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
                        tag = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
                        # If they ever fix their markup, this will break :-(
                        summary = self.tag_to_string(tag.findNextSibling('p'))
                        description = author  + '<br/>' + summary
                        articles.append({'title': title, 'date': None, 'url': url, 'description': description})
                    else:
                        continue
                else:
                    continue
            answer.append(('Magazine', articles))
            ul = content.find('ul')
            if ul:
                articles = []
                for li in ul.findAll('li'):
                    tag = li.find('div', attrs = {'class': 'views-field-title'})
                    if tag:
                        a = tag.find('a')
                        if a:
                            title = self.tag_to_string(a)
                            url = self.INDEX + a['href']
                            description = ''
                            tag = li.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'})
                            if tag:
                                description = self.tag_to_string(tag)
-                            articles.append({'title': title, 'date': None, 'url': url, 'description': description})
+                tags = []
-                        else:
+                for div in content.findAll('div', attrs = {'class': re.compile(r'view-row\s+views-row-[0-9]+\s+views-row-[odd|even].*')}):
-                            continue
+                    tags.append(div)
                ul = content.find('ul')
                for li in content.findAll('li'):
                    tags.append(li)
                for div in tags:
                    title = url = description = author = None
                    if self.INCLUDE_PREMIUM:
                        found_premium = False
                    else:
-                        continue
+                        found_premium = div.findAll('span', attrs={'class':
-
+                                                               'premium-icon'})
-                answer.append(('Letters to the Editor', articles))
+                    if not found_premium:
                        tag = div.find('div', attrs={'class': 'views-field-title'})
                        if tag:
                            a = tag.find('a')
                            if a:
                                title = self.tag_to_string(a)
                                url = self.INDEX + a['href']
                            author = self.tag_to_string(div.find('div', attrs = {'class': 'views-field-field-article-display-authors-value'}))
                            tag_summary = div.find('span', attrs = {'class': 'views-field-field-article-summary-value'})
                            description = self.tag_to_string(tag_summary)
                            articles.append({'title':title, 'date':None, 'url':url,
                                     'description':description, 'author':author})
                if articles:
                    answer.append((section, articles))
        return answer
    def preprocess_html(self, soup):
--- a/recipes/ilmanifesto.recipe
+++ b/recipes/ilmanifesto.recipe
@ -0,0 +1,110 @@
 from calibre import strftime
 from calibre.web.feeds.recipes import BasicNewsRecipe
 from calibre.ebooks.BeautifulSoup import BeautifulSoup
 MANIFESTO_BASEURL = 'http://www.ilmanifesto.it/'
 class IlManifesto(BasicNewsRecipe):
  title          = 'Il Manifesto'
  __author__ = 'Giacomo Lacava'
  description = 'quotidiano comunista - ultima edizione html disponibile'
  publication_type = 'newspaper'
  publisher = 'il manifesto coop. editrice a r.l.'
  language = 'it'
  oldest_article = 2
  max_articles_per_feed = 100
  delay = 1
  no_stylesheets = True
  simultaneous_downloads = 5
  timeout = 30
  auto_cleanup = True
  remove_tags = [dict(name='div', attrs={'class':'column_1 float_left'})]
  remove_tags_before = dict(name='div',attrs={'class':'column_2 float_right'})
  remove_tags_after = dict(id='myPrintArea')
  manifesto_index = None
  manifesto_datestr = None
  def _set_manifesto_index(self):
    if self.manifesto_index == None:
      startUrl = MANIFESTO_BASEURL  + 'area-abbonati/in-edicola/'
      startSoup = self.index_to_soup(startUrl)
      lastEdition = startSoup.findAll('div',id='accordion_inedicola')[1].find('a')['href']
      del(startSoup)
      self.manifesto_index = MANIFESTO_BASEURL + lastEdition
      urlsplit = lastEdition.split('/')
      self.manifesto_datestr = urlsplit[-1]
      if urlsplit[-1] == '':
        self.manifesto_datestr = urlsplit[-2]
  def get_cover_url(self):
    self._set_manifesto_index()
    url = MANIFESTO_BASEURL + 'fileadmin/archivi/in_edicola/%sprimapagina.gif' % self.manifesto_datestr
    return url
  def parse_index(self):
    self._set_manifesto_index()
    soup = self.index_to_soup(self.manifesto_index)
    feedLinks =  soup.find('div',id='accordion_inedicola').findAll('a')
    result = []
    for feed in feedLinks:
      articles = []
      feedName = feed.find('h2').string
      feedUrl = MANIFESTO_BASEURL  + feed['href']
      feedSoup = self.index_to_soup(feedUrl)
      indexRoot = feedSoup.find('div',attrs={'class':'column1'})
      for div in indexRoot.findAll('div',attrs={'class':'strumenti1_inedicola'}):
        artLink =  div.find('a')
        if artLink is None: continue # empty div
        title = artLink.string
        url = MANIFESTO_BASEURL  + artLink['href']
        description = ''
        descNode = div.find('div',attrs={'class':'text_12'})
        if descNode is not None:
          description = descNode.string
        author = ''
        authNode = div.find('div',attrs={'class':'firma'})
        if authNode is not None:
          author = authNode.string
        articleText = ''
        article = {
          'title':title,
          'url':url,
          'date': strftime('%d %B %Y'),
          'description': description,
          'content': articleText,
          'author': author
          }
        articles.append(article)
      result.append((feedName,articles))
    return result
  def extract_readable_article(self, html, url):
    bs = BeautifulSoup(html)
    col1 = bs.find('div',attrs={'class':'column1'})
    content = col1.find('div',attrs={'class':'bodytext'})
    title = bs.find(id='titolo_articolo').string
    author = col1.find('span',attrs={'class':'firma'})
    subtitle = ''
    subNode = col1.findPrevious('div',attrs={'class':'occhiello_rosso'})
    if subNode is not None:
      subtitle = subNode
    summary = ''
    sommNode = bs.find('div',attrs={'class':'sommario'})
    if sommNode is not None:
      summary = sommNode
    template = "<html><head><title>%(title)s</title></head><body><h1>%(title)s</h1><h2>%(subtitle)s</h2><h3>%(author)s</h3><div style='font-size: x-large;'>%(summary)s</div><div>%(content)s</div></body></html>"
    del(bs)
    return template % dict(title=title,subtitle=subtitle,author=author,summary=summary,content=content)
--- a/recipes/mwjournal.recipe
+++ b/recipes/mwjournal.recipe
@ -1,58 +1,53 @@
 #!/usr/bin/env  python
 ##
-## Title:        Microwave Journal RSS recipe
+## Title:        Microwave Journal
 ## Contact:      Kiavash (use Mobile Read)
 ##
 ## License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
 ## Copyright:    Kiavash
 ##
 ## Written:      Jan 2012
-## Last Edited:  Jan 2012
+## Last Edited:  Feb 2012
 ##
 # Feb 2012: New Recipe compatible with the MWJournal 2.0 website
 __license__   = 'GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html'
 __copyright__   = 'Kiavash'
 __author__ = 'Kaivash'
 '''
-Microwave Journal Monthly Magazine
+microwavejournal.com
 You need to sign up (free) and get username/password.
 '''
-import re    # Import the regular expressions module.
+import re
 from calibre.ptempfile import TemporaryFile # we need this for saving to a temp file
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
 class MWJournal(BasicNewsRecipe):
    # Title to use for the ebook.
    title          = u'Microwave Journal'
    __author__ = 'Kiavash'
    language = 'en'
-    #A brief description for the ebook.
+    title            = u'Microwave Journal'
-    description = u'Microwave Journal web site ebook created using rss feeds.'
+    description      = u'Microwave Journal Monthly Magazine'
-
+    publisher        = 'Horizon House'
    # Set publisher and publication type.
    publisher = 'Horizon House'
    publication_type = 'magazine'
    INDEX            = 'http://www.microwavejournal.com/publications/'
-    oldest_article = 31        # monthly published magazine. Some months are 31 days!
+    language = 'en'
    max_articles_per_feed = 100
    remove_empty_feeds = True
    auto_cleanup = True
    # Disable stylesheets and javascript from site.
    no_stylesheets = True
    remove_javascript = True
    asciiize = True    # Converts all none ascii characters to their ascii equivalents
    needs_subscription = True    # oh yeah... we need to login btw.
    # Timeout for fetching files from the server in seconds. The default of 120 seconds, seems somewhat excessive.
    timeout = 30
-    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
+    Convert_Grayscale = False # Convert images to gray scale or not
    keep_only_tags = [dict(name='div', attrs={'class':'record'})]
    no_stylesheets = True
    remove_javascript = True
    remove_tags    = [
                        dict(name='font', attrs={'class':'footer'}),    # remove fonts
                     ]
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Specify extra CSS - overrides ALL other CSS (IE. Added last).
    extra_css = 'body { font-family: verdana, helvetica, sans-serif; } \
                 .introduction, .first { font-weight: bold; } \
                 .cross-head { font-weight: bold; font-size: 125%; } \
@ -72,72 +67,75 @@ class MWJournal(BasicNewsRecipe):
                 h3 { font-size: 125%; font-weight: bold; } \
                 h4, h5, h6 { font-size: 100%; font-weight: bold; }'
-    remove_tags    = [
+    # Remove the line breaks, href links and float left/right and picture width/height.
                        dict(name='div', attrs={'class':'boxadzonearea350'}), # Removes banner ads
                        dict(name='font', attrs={'class':'footer'}),    # remove fonts if you do like your fonts more! Comment out to use website's fonts
                        dict(name='div', attrs={'class':'newsarticlead'})
                     ]
    # Remove various tag attributes to improve the look of the ebook pages.
    remove_attributes = [ 'border', 'cellspacing', 'align', 'cellpadding', 'colspan',
                          'valign', 'vspace', 'hspace', 'alt', 'width', 'height' ]
    # Remove the line breaks as well as href links. Books don't have links generally speaking
    preprocess_regexps     = [(re.compile(r'<br[ ]*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<br[ ]*clear.*/>', re.IGNORECASE), lambda m: ''),
                              (re.compile(r'<a.*?>'), lambda h1: ''),
-                              (re.compile(r'</a>'), lambda h2: '')
+                              (re.compile(r'</a>'), lambda h2: ''),
                              (re.compile(r'float:.*?'), lambda h3: ''),
                              (re.compile(r'width:.*?px'), lambda h4: ''),
                              (re.compile(r'height:.*?px'), lambda h5: '')
                              ]
    # Select the feeds that you are interested.
    feeds          = [
                        (u'Current Issue', u'http://www.mwjournal.com/rss/Rss.asp?type=99'),
                        (u'Industry News', u'http://www.mwjournal.com/rss/Rss.asp?type=1'),
                        (u'Resources', u'http://www.mwjournal.com/rss/Rss.asp?type=3'),
                        (u'Buyer\'s Guide', u'http://www.mwjournal.com/rss/Rss.asp?type=5'),
                        (u'Events', u'http://www.mwjournal.com/rss/Rss.asp?type=2'),
                        (u'All Updates', u'http://www.mwjournal.com/rss/Rss.asp?type=0'),
                    ]
    #  No magazine is complete without cover. Let's get it then!
    # The function is adapted from the Economist recipe
    def get_cover_url(self):
        cover_url = None
        cover_page_location = 'http://www.mwjournal.com/Journal/'    # Cover image is located on this page
        soup = self.index_to_soup(cover_page_location)
        cover_item = soup.find('img',attrs={'src':lambda x: x and '/IssueImg/3_MWJ_CurrIss_CoverImg' in x})    # There are three files named cover, we want the highest resolution which is the 3rd image. So we look for the pattern. Remember that the name of the cover image changes every month so we cannot search for the complete name. Instead we are searching for the pattern
        if cover_item:
            cover_url = 'http://www.mwjournal.com' + cover_item['src'].strip()    # yeah! we found it. Let's fetch the image file and pass it as cover to calibre
        return cover_url
    def print_version(self, url):
-        if url.find('/Journal/article.asp?HH_ID=') >= 0:
+        return url.replace('/articles/', '/articles/print/')
            return self.browser.open_novisit(url).geturl().replace('/Journal/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
        elif  url.find('/News/article.asp?HH_ID=') >= 0:
            return self.browser.open_novisit(url).geturl().replace('/News/article.asp?HH_ID=', '/Journal/Print.asp?Id=')
        elif  url.find('/Resources/TechLib.asp?HH_ID=') >= 0:
            return self.browser.open_novisit(url).geturl().replace('/Resources/TechLib.asp?HH_ID=', '/Resources/PrintRessource.asp?Id=')
-    def get_browser(self):
+    def parse_index(self):
-        '''
+        articles = []
-        Microwave Journal website, directs the login page to omeda.com once login info is submitted, omeda.com redirects to mwjournal.com with again the browser logs in into that site (hidden from the user). To overcome this obsticle, first login page is fetch and its output is stored to an HTML file. Then the HTML file is opened again and second login form is submitted (Many thanks to Barty which helped with second page login).
+
-        '''
+        soup = self.index_to_soup(self.INDEX)
-        br = BasicNewsRecipe.get_browser()
+        ts = soup.find('div', attrs={'class':'box1 article publications-show'})
-        if self.username is not None and self.password is not None:
+        ds = self.tag_to_string(ts.find('h2'))
-            url = ('http://www.omeda.com/cgi-win/mwjreg.cgi?m=login') #  main login page.
+        self.log('Found Current Issue:', ds)
-            br.open(url)    # fetch the 1st login page
+        self.timefmt = ' [%s]'%ds
-            br.select_form('login')        # finds the login form
+
-            br['EMAIL_ADDRESS']   = self.username    # fills the username
+        cover = ts.find('img', src=True)
-            br['PASSWORD'] = self.password        # fills the password
+        if cover is not None:
-            raw = br.submit().read()        # submit the form and read the 2nd login form
+            self.cover_url = 'http://www.microwavejournal.com' + cover['src']
-            # save it to an htm temp file (from ESPN recipe written by  Kovid Goyal kovid@kovidgoyal.net
+            self.log('Found Cover image:', self.cover_url)
-            with TemporaryFile(suffix='.htm') as fname:
+
-                with open(fname, 'wb') as f:
+        feeds = []
-                    f.write(raw)
+        seen_titles = set([]) # This is used to remove duplicant articles
-                br.open_local_file(fname)
+        sections = soup.find('div', attrs={'class':'box2 publication'})
-            br.select_form(nr=0)    # finds submit on the 2nd form
+        for section in sections.findAll('div', attrs={'class':'records'}):
-            didwelogin = br.submit().read()        # submit it and read the return html
+            section_title = self.tag_to_string(section.find('h3'))
-            if 'Welcome ' not in didwelogin:    # did it login successfully? Is Username/password correct?
+            self.log('Found section:', section_title)
-                raise Exception('Failed to login, are you sure your username and password are correct?')
+            articles = []
-            #login is done
+            for post in section.findAll('div', attrs={'class':'record'}):
-        return br
+                h = post.find('h2')
                title = self.tag_to_string(h)
                if title.find('The MWJ Puzzler') >=0: #Let's get rid of the useless Puzzler!
                    continue
                if title in seen_titles:
                    continue
                seen_titles.add(title)
                a = post.find('a', href=True)
                url = a['href']
                if url.startswith('/'):
                    url = 'http://www.microwavejournal.com'+url
                abstract = post.find('div', attrs={'class':'abstract'})
                p = abstract.find('p')
                desc = None
                self.log('\tFound article:', title, 'at', url)
                if p is not None:
                    desc = self.tag_to_string(p)
                    self.log('\t\t', desc)
                articles.append({'title':title, 'url':url, 'description':desc,
                    'date':self.timefmt})
            if articles:
                feeds.append((section_title, articles))
        return feeds
    def postprocess_html(self, soup, first):
        if self.Convert_Grayscale:
            #process all the images
            for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
                iurl = tag['src']
                img = Image()
                img.open(iurl)
                if img < 0:
                    raise RuntimeError('Out of memory')
                img.type = "GrayscaleType"
                img.save(iurl)
        return soup
--- a/recipes/readitlater.recipe
+++ b/recipes/readitlater.recipe
@ -1,30 +1,36 @@
 """
 readitlaterlist.com
 """
 __license__   = 'GPL v3'
 __copyright__ = '''
 2010, Darko Miletic <darko.miletic at gmail.com>
 2011, Przemyslaw Kryger <pkryger at gmail.com>
-'''
+2012, tBunnyMan <Wag That Tail At Me dot com>
 '''
 readitlaterlist.com
 '''
 from calibre import strftime
 from calibre.web.feeds.news import BasicNewsRecipe
 class Readitlater(BasicNewsRecipe):
-    title                 = 'Read It Later'
+    title                 = 'ReadItLater'
-    __author__            = 'Darko Miletic, Przemyslaw Kryger'
+    __author__            = 'Darko Miletic, Przemyslaw Kryger, Keith Callenberg, tBunnyMan'
-    description           = '''Personalized news feeds. Go to readitlaterlist.com to
+    description           = '''Personalized news feeds. Go to readitlaterlist.com to setup \
-                               setup up your news. Fill in your account
+                            up your news. This version displays pages of articles from \
-                               username, and optionally you can add password.'''
+                            oldest to newest, with max & minimum counts, and marks articles \
-    publisher             = 'readitlater.com'
+                            read after downloading.'''
    publisher             = 'readitlaterlist.com'
    category              = 'news, custom'
    oldest_article        = 7
-    max_articles_per_feed = 100
+    max_articles_per_feed = 50
    minimum_articles      = 1
    no_stylesheets        = True
    use_embedded_content  = False
    needs_subscription    = True
    INDEX                 = u'http://readitlaterlist.com'
    LOGIN                 = INDEX + u'/l'
    readList              = []
    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -33,41 +39,46 @@ class Readitlater(BasicNewsRecipe):
            br.select_form(nr=0)
            br['feed_id'] = self.username
            if self.password is not None:
-               br['password'] = self.password
+                br['password'] = self.password
            br.submit()
        return br
    def get_feeds(self):
-        self.report_progress(0, ('Fetching list of feeds...'))
+        self.report_progress(0, ('Fetching list of pages...'))
        lfeeds = []
        i = 1
        feedurl = self.INDEX + u'/unread/1'
        while True:
            title = u'Unread articles, page ' + str(i)
-            lfeeds.append((title, feedurl))
+            lfeeds.insert(0, (title, feedurl))
-            self.report_progress(0, ('Got ') + str(i) + (' feeds'))
+            self.report_progress(0, ('Got ') + str(i) + (' pages'))
            i += 1
            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('a',attrs={'id':'next', 'class':'active'})
+            ritem = soup.find('a', attrs={'id':'next', 'class':'active'})
            if ritem is None:
                break
            feedurl = self.INDEX + ritem['href']
        if self.test:
            return lfeeds[:2]
        return lfeeds
    def parse_index(self):
        totalfeeds = []
        articlesToGrab = self.max_articles_per_feed
        lfeeds = self.get_feeds()
        for feedobj in lfeeds:
            if articlesToGrab < 1:
                break
            feedtitle, feedurl = feedobj
            self.report_progress(0, ('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl))
            articles = []
            soup = self.index_to_soup(feedurl)
-            ritem = soup.find('ul',attrs={'id':'list'})
+            ritem = soup.find('ul', attrs={'id':'list'})
-            for item in ritem.findAll('li'):
+            for item in reversed(ritem.findAll('li')):
                if articlesToGrab < 1:
                    break
                else:
                    articlesToGrab -= 1
                description = ''
-                atag = item.find('a',attrs={'class':'text'})
+                atag = item.find('a', attrs={'class':'text'})
                if atag and atag.has_key('href'):
                    url         = self.INDEX + atag['href']
                    title       = self.tag_to_string(item.div)
@ -78,6 +89,20 @@ class Readitlater(BasicNewsRecipe):
                                     ,'url'        :url
                                     ,'description':description
                                    })
                    readLink = item.find('a', attrs={'class':'check'})['href']
                    self.readList.append(readLink)
            totalfeeds.append((feedtitle, articles))
        if len(self.readList) < self.minimum_articles:
            raise Exception("Not enough articles in RIL! Change minimum_articles or add more.")
        return totalfeeds
    def mark_as_read(self, markList):
        br = self.get_browser()
        for link in markList:
            url = self.INDEX + link
            response = br.open(url)
            response
    def cleanup(self):
        self.mark_as_read(self.readList)
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -5,13 +5,14 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, glob, functools, re
 from calibre import guess_type
-from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
+from calibre.customize import (FileTypePlugin, MetadataReaderPlugin,
-    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
+    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase)
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.html.to_zip import HTML2ZIP
 plugins = []
 # To archive plugins {{{
 class PML2PMLZ(FileTypePlugin):
@ -86,6 +87,8 @@ class TXT2TXTZ(FileTypePlugin):
        return list(set(images))
    def run(self, path_to_ebook):
        from calibre.ebooks.metadata.opf2 import metadata_to_opf
        with open(path_to_ebook, 'rb') as ebf:
            txt = ebf.read()
        base_dir = os.path.dirname(path_to_ebook)
@ -117,6 +120,7 @@ class TXT2TXTZ(FileTypePlugin):
            # No images so just import the TXT file.
            return path_to_ebook
 plugins += [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract,]
 # }}}
 # Metadata reader plugins {{{
@ -399,6 +403,10 @@ class ZipMetadataReader(MetadataReaderPlugin):
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.zip import get_metadata
        return get_metadata(stream)
 plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
                                        x.__name__.endswith('MetadataReader')]
 # }}}
 # Metadata writer plugins {{{
@ -499,107 +507,51 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.extz import set_metadata
        set_metadata(stream, mi)
-# }}}
+plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
-
+                                        x.__name__.endswith('MetadataWriter')]
 from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.djvu.input import DJVUInput
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.htmlz.input import HTMLZInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.odt.input import ODTInput
 from calibre.ebooks.pdb.input import PDBInput
 from calibre.ebooks.azw4.input import AZW4Input
 from calibre.ebooks.pdf.input import PDFInput
 from calibre.ebooks.pml.input import PMLInput
 from calibre.ebooks.rb.input import RBInput
 from calibre.web.feeds.input import RecipeInput
 from calibre.ebooks.rtf.input import RTFInput
 from calibre.ebooks.tcr.input import TCRInput
 from calibre.ebooks.txt.input import TXTInput
 from calibre.ebooks.lrf.input import LRFInput
 from calibre.ebooks.chm.input import CHMInput
 from calibre.ebooks.snb.input import SNBInput
 from calibre.ebooks.epub.output import EPUBOutput
 from calibre.ebooks.fb2.output import FB2Output
 from calibre.ebooks.lit.output import LITOutput
 from calibre.ebooks.lrf.output import LRFOutput
 from calibre.ebooks.mobi.output import MOBIOutput
 from calibre.ebooks.oeb.output import OEBOutput
 from calibre.ebooks.pdb.output import PDBOutput
 from calibre.ebooks.pdf.output import PDFOutput
 from calibre.ebooks.pml.output import PMLOutput
 from calibre.ebooks.rb.output import RBOutput
 from calibre.ebooks.rtf.output import RTFOutput
 from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.txt.output import TXTZOutput
 from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.htmlz.output import HTMLZOutput
 from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
 from calibre.devices.apple.driver import ITUNES
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
 from calibre.devices.cybook.driver import CYBOOK, ORIZON
 from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
                BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
                POCKETBOOK701, POCKETBOOK360P, PI2)
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
        KINDLE_FIRE)
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.prst1.driver import PRST1
 from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60, WEBOS
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
        LIBREAIR, ODYSSEY)
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
        TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
        Epubcheck, ]
 # New metadata download plugins {{{
 from calibre.ebooks.metadata.sources.google import GoogleBooks
 from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 from calibre.ebooks.metadata.sources.isbndb import ISBNDB
 from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
 plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
 # }}}
 # Conversion plugins {{{
 from calibre.ebooks.conversion.plugins.comic_input import ComicInput
 from calibre.ebooks.conversion.plugins.djvu_input import DJVUInput
 from calibre.ebooks.conversion.plugins.epub_input import EPUBInput
 from calibre.ebooks.conversion.plugins.fb2_input import FB2Input
 from calibre.ebooks.conversion.plugins.html_input import HTMLInput
 from calibre.ebooks.conversion.plugins.htmlz_input import HTMLZInput
 from calibre.ebooks.conversion.plugins.lit_input import LITInput
 from calibre.ebooks.conversion.plugins.mobi_input import MOBIInput
 from calibre.ebooks.conversion.plugins.odt_input import ODTInput
 from calibre.ebooks.conversion.plugins.pdb_input import PDBInput
 from calibre.ebooks.conversion.plugins.azw4_input import AZW4Input
 from calibre.ebooks.conversion.plugins.pdf_input import PDFInput
 from calibre.ebooks.conversion.plugins.pml_input import PMLInput
 from calibre.ebooks.conversion.plugins.rb_input import RBInput
 from calibre.ebooks.conversion.plugins.recipe_input import RecipeInput
 from calibre.ebooks.conversion.plugins.rtf_input import RTFInput
 from calibre.ebooks.conversion.plugins.tcr_input import TCRInput
 from calibre.ebooks.conversion.plugins.txt_input import TXTInput
 from calibre.ebooks.conversion.plugins.lrf_input import LRFInput
 from calibre.ebooks.conversion.plugins.chm_input import CHMInput
 from calibre.ebooks.conversion.plugins.snb_input import SNBInput
 from calibre.ebooks.conversion.plugins.epub_output import EPUBOutput
 from calibre.ebooks.conversion.plugins.fb2_output import FB2Output
 from calibre.ebooks.conversion.plugins.lit_output import LITOutput
 from calibre.ebooks.conversion.plugins.lrf_output import LRFOutput
 from calibre.ebooks.conversion.plugins.mobi_output import MOBIOutput
 from calibre.ebooks.conversion.plugins.oeb_output import OEBOutput
 from calibre.ebooks.conversion.plugins.pdb_output import PDBOutput
 from calibre.ebooks.conversion.plugins.pdf_output import PDFOutput
 from calibre.ebooks.conversion.plugins.pml_output import PMLOutput
 from calibre.ebooks.conversion.plugins.rb_output import RBOutput
 from calibre.ebooks.conversion.plugins.rtf_output import RTFOutput
 from calibre.ebooks.conversion.plugins.tcr_output import TCROutput
 from calibre.ebooks.conversion.plugins.txt_output import TXTOutput, TXTZOutput
 from calibre.ebooks.conversion.plugins.html_output import HTMLOutput
 from calibre.ebooks.conversion.plugins.htmlz_output import HTMLZOutput
 from calibre.ebooks.conversion.plugins.snb_output import SNBOutput
 plugins += [
    ComicInput,
    DJVUInput,
@ -642,6 +594,66 @@ plugins += [
    HTMLZOutput,
    SNBOutput,
 ]
 # }}}
 # Catalog plugins {{{
 from calibre.library.catalogs.csv_xml import CSV_XML
 from calibre.library.catalogs.bibtex import BIBTEX
 from calibre.library.catalogs.epub_mobi import EPUB_MOBI
 plugins += [CSV_XML, BIBTEX, EPUB_MOBI]
 # }}}
 # EPUB Fix plugins {{{
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins += [Unmanifested, Epubcheck]
 # }}}
 # Profiles {{{
 from calibre.customize.profiles import input_profiles, output_profiles
 plugins += input_profiles + output_profiles
 # }}}
 # Device driver plugins {{{
 from calibre.devices.apple.driver import ITUNES
 from calibre.devices.hanlin.driver import HANLINV3, HANLINV5, BOOX, SPECTRA
 from calibre.devices.blackberry.driver import BLACKBERRY, PLAYBOOK
 from calibre.devices.cybook.driver import CYBOOK, ORIZON
 from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
                POCKETBOOK360, GER2, ITALICA, ECLICTO, DBOOK, INVESBOOK,
                BOOQ, ELONEX, POCKETBOOK301, MENTOR, POCKETBOOK602,
                POCKETBOOK701, POCKETBOOK360P, PI2)
 from calibre.devices.iliad.driver import ILIAD
 from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
 from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
 from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
        KINDLE_FIRE)
 from calibre.devices.nook.driver import NOOK, NOOK_COLOR
 from calibre.devices.prs505.driver import PRS505
 from calibre.devices.prst1.driver import PRST1
 from calibre.devices.user_defined.driver import USER_DEFINED
 from calibre.devices.android.driver import ANDROID, S60, WEBOS
 from calibre.devices.nokia.driver import N770, N810, E71X, E52
 from calibre.devices.eslick.driver import ESLICK, EBK52
 from calibre.devices.nuut2.driver import NUUT2
 from calibre.devices.iriver.driver import IRIVER_STORY
 from calibre.devices.binatone.driver import README
 from calibre.devices.hanvon.driver import (N516, EB511, ALEX, AZBOOKA, THEBOOK,
        LIBREAIR, ODYSSEY)
 from calibre.devices.edge.driver import EDGE
 from calibre.devices.teclast.driver import (TECLAST_K3, NEWSMY, IPAPYRUS,
        SOVOS, PICO, SUNSTECH_EB700, ARCHOS7O, STASH, WEXLER)
 from calibre.devices.sne.driver import SNE
 from calibre.devices.misc import (PALMPRE, AVANT, SWEEX, PDNOVEL,
        GEMEI, VELOCITYMICRO, PDNOVEL_KOBO, LUMIREAD, ALURATEK_COLOR,
        TREKSTOR, EEEREADER, NEXTBOOK, ADAM, MOOVYBOOK, COBY, EX124G)
 from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.devices.boeye.driver import BOEYE_BEX, BOEYE_BDX
 # Order here matters. The first matched device is the one used.
 plugins += [
    HANLINV3,
@ -716,11 +728,20 @@ plugins += [
    BOEYE_BDX,
    USER_DEFINED,
 ]
-plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
+# }}}
-                                        x.__name__.endswith('MetadataReader')]
+
-plugins += [x for x in list(locals().values()) if isinstance(x, type) and \
+# New metadata download plugins {{{
-                                        x.__name__.endswith('MetadataWriter')]
+from calibre.ebooks.metadata.sources.google import GoogleBooks
-plugins += input_profiles + output_profiles
+from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 from calibre.ebooks.metadata.sources.isbndb import ISBNDB
 from calibre.ebooks.metadata.sources.overdrive import OverDrive
 from calibre.ebooks.metadata.sources.douban import Douban
 from calibre.ebooks.metadata.sources.ozon import Ozon
 plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB, OverDrive, Douban, Ozon]
 # }}}
 # Interface Actions {{{
@ -1623,3 +1644,34 @@ plugins += [
 ]
 # }}}
 if __name__ == '__main__':
    # Test load speed
    import subprocess, textwrap
    try:
        subprocess.check_call(['python', '-c', textwrap.dedent(
        '''
        from __future__ import print_function
        import time, sys, init_calibre
        st = time.time()
        import calibre.customize.builtins
        t = time.time() - st
        ret = 0
        for x in ('lxml', 'calibre.ebooks.BeautifulSoup', 'uuid',
            'calibre.utils.terminfo', 'calibre.utils.magick', 'PIL', 'Image',
            'sqlite3', 'mechanize', 'httplib', 'xml'):
            if x in sys.modules:
                ret = 1
                print (x, 'has been loaded by a plugin')
        if ret:
            print ('\\nA good way to track down what is loading something is to run'
            ' python -c "import init_calibre; import calibre.customize.builtins"')
            print()
        print ('Time taken to import all plugins: %.2f'%t)
        sys.exit(ret)
        ''')])
    except subprocess.CalledProcessError:
        raise SystemExit(1)
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -5,7 +5,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from itertools import izip
 from xml.sax.saxutils import escape
 from calibre.customize import Plugin as _Plugin
@ -268,6 +267,7 @@ class OutputProfile(Plugin):
    @classmethod
    def tags_to_string(cls, tags):
        from xml.sax.saxutils import escape
        return escape(', '.join(tags))
 class iPadOutput(OutputProfile):
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -447,11 +447,14 @@ def plugin_for_catalog_format(fmt):
 # }}}
-def device_plugins(): # {{{
+def device_plugins(include_disabled=False): # {{{
    for plugin in _initialized_plugins:
        if isinstance(plugin, DevicePlugin):
-            if not is_disabled(plugin):
+            if include_disabled or not is_disabled(plugin):
                if platform in plugin.supported_platforms:
                    if getattr(plugin, 'plugin_needs_delayed_initialization',
                            False):
                        plugin.do_delayed_plugin_initialization()
                    yield plugin
 # }}}
@ -496,7 +499,7 @@ def initialize_plugin(plugin, path_to_zip_file):
 def has_external_plugins():
    return bool(config['plugins'])
-def initialize_plugins():
+def initialize_plugins(perf=False):
    global _initialized_plugins
    _initialized_plugins = []
    conflicts = [name for name in config['plugins'] if name in
@ -504,6 +507,11 @@ def initialize_plugins():
    for p in conflicts:
        remove_plugin(p)
    external_plugins = config['plugins']
    ostdout, ostderr = sys.stdout, sys.stderr
    if perf:
        from collections import defaultdict
        import time
        times = defaultdict(lambda:0)
    for zfp in list(external_plugins) + builtin_plugins:
        try:
            if not isinstance(zfp, type):
@ -516,12 +524,22 @@ def initialize_plugins():
                plugin = load_plugin(zfp) if not isinstance(zfp, type) else zfp
            except PluginNotFound:
                continue
            if perf:
                st = time.time()
            plugin = initialize_plugin(plugin, None if isinstance(zfp, type) else zfp)
            if perf:
                times[plugin.name] = time.time() - st
            _initialized_plugins.append(plugin)
        except:
            print 'Failed to initialize plugin:', repr(zfp)
            if DEBUG:
                traceback.print_exc()
    # Prevent a custom plugin from overriding stdout/stderr as this breaks
    # ipython
    sys.stdout, sys.stderr = ostdout, ostderr
    if perf:
        for x in sorted(times, key=lambda x:times[x]):
            print ('%50s: %.3f'%(x, times[x]))
    _initialized_plugins.sort(cmp=lambda x,y:cmp(x.priority, y.priority), reverse=True)
    reread_filetype_plugins()
    reread_metadata_plugins()
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -221,6 +221,20 @@ class ANDROID(USBMS):
                drives['main']  = letter_a
        return drives
    @classmethod
    def configure_for_kindle_app(cls):
        proxy = cls._configProxy()
        proxy['format_map'] = ['mobi', 'azw', 'azw1', 'azw4', 'pdf']
        proxy['use_subdirs'] = False
        proxy['extra_customization'] = ','.join(['kindle']+cls.EBOOK_DIR_MAIN)
    @classmethod
    def configure_for_generic_epub_app(cls):
        proxy = cls._configProxy()
        del proxy['format_map']
        del proxy['use_subdirs']
        del proxy['extra_customization']
 class S60(USBMS):
    name = 'S60 driver'
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
--- a/src/calibre/devices/bambook/libbambookcore.py
+++ b/src/calibre/devices/bambook/libbambookcore.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 Sanda library wrapper
 '''
-import ctypes, uuid, hashlib, os, sys
+import ctypes, hashlib, os, sys
 from threading import Event, Lock
 from calibre.constants import iswindows
 from calibre import load_library
@ -350,6 +350,7 @@ class Bambook:
        return None
    def SendFile(self, fileName, guid = None):
        import uuid
        if self.handle:
            taskID = job.NewJob()
            if guid:
--- a/src/calibre/devices/cybook/t2b.py
+++ b/src/calibre/devices/cybook/t2b.py
--- a/src/calibre/devices/folder_device/driver.py
+++ b/src/calibre/devices/folder_device/driver.py
@ -97,3 +97,13 @@ class FOLDER_DEVICE(USBMS):
    @classmethod
    def settings(self):
        return FOLDER_DEVICE_FOR_CONFIG._config().parse()
    @classmethod
    def config_widget(cls):
        return FOLDER_DEVICE_FOR_CONFIG.config_widget()
    @classmethod
    def save_settings(cls, config_widget):
        return FOLDER_DEVICE_FOR_CONFIG.save_settings(config_widget)
--- a/src/calibre/devices/kindle/apnx.py
+++ b/src/calibre/devices/kindle/apnx.py
@ -9,7 +9,6 @@ Generates and writes an APNX page mapping file.
 '''
 import struct
 import uuid
 from calibre.ebooks.mobi.reader import MobiReader
 from calibre.ebooks.pdb.header import PdbHeaderReader
@ -51,6 +50,7 @@ class APNXBuilder(object):
            apnxf.write(apnx)
    def generate_apnx(self, pages):
        import uuid
        apnx = ''
        content_vals = {
--- a/src/calibre/devices/kindle/driver.py
+++ b/src/calibre/devices/kindle/driver.py
@ -10,10 +10,8 @@ Device driver for Amazon's Kindle
 import datetime, os, re, sys, json, hashlib
 from calibre.devices.kindle.apnx import APNXBuilder
 from calibre.devices.kindle.bookmark import Bookmark
 from calibre.devices.usbms.driver import USBMS
 from calibre.ebooks.metadata import MetaInformation
 from calibre import strftime
 '''
@ -152,6 +150,7 @@ class KINDLE(USBMS):
        path_map, book_ext = resolve_bookmark_paths(storage, path_map)
        bookmarked_books = {}
        for id in path_map:
            bookmark_ext = path_map[id].rpartition('.')[2]
            myBookmark = Bookmark(path_map[id], id, book_ext[id], bookmark_ext)
@ -236,6 +235,8 @@ class KINDLE(USBMS):
    def add_annotation_to_library(self, db, db_id, annotation):
        from calibre.ebooks.BeautifulSoup import Tag
        from calibre.ebooks.metadata import MetaInformation
        bm = annotation
        ignore_tags = set(['Catalog', 'Clippings'])
@ -363,6 +364,8 @@ class KINDLE2(KINDLE):
        '''
        Hijacking this function to write the apnx file.
        '''
        from calibre.devices.kindle.apnx import APNXBuilder
        opts = self.settings()
        if not opts.extra_customization[self.OPT_APNX]:
            return
--- a/src/calibre/devices/kobo/bookmark.py
+++ b/src/calibre/devices/kobo/bookmark.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from contextlib import closing
 import sqlite3 as sqlite
 class Bookmark(): # {{{
    '''
@ -32,7 +31,7 @@ class Bookmark(): # {{{
    def get_bookmark_data(self):
        ''' Return the timestamp and last_read_location '''
-
+        import sqlite3 as sqlite
        user_notes = {}
        self.timestamp = os.path.getmtime(self.path)
        with closing(sqlite.connect(self.db_path)) as connection:
--- a/src/calibre/devices/kobo/driver.py
+++ b/src/calibre/devices/kobo/driver.py
@ -6,7 +6,6 @@ __copyright__ = '2010, Timothy Legge <timlegge@gmail.com> and Kovid Goyal <kovid
 __docformat__ = 'restructuredtext en'
 import os, time, calendar
 import sqlite3 as sqlite
 from contextlib import closing
 from calibre.devices.usbms.books import BookList
 from calibre.devices.kobo.books import Book
@ -16,7 +15,6 @@ from calibre.devices.mime import mime_type_ext
 from calibre.devices.usbms.driver import USBMS, debug_print
 from calibre import prints
 from calibre.devices.usbms.books import CollectionsBookList
 from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ptempfile import PersistentTemporaryFile
 class KOBO(USBMS):
@ -230,6 +228,7 @@ class KOBO(USBMS):
                traceback.print_exc()
            return changed
        import sqlite3 as sqlite
        with closing(sqlite.connect(
            self.normalize_path(self._main_prefix +
                '.kobo/KoboReader.sqlite'))) as connection:
@ -344,6 +343,7 @@ class KOBO(USBMS):
        #    2) volume_shorcover
        #    2) content
        import sqlite3 as sqlite
        debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
        with closing(sqlite.connect(self.normalize_path(self._main_prefix +
            '.kobo/KoboReader.sqlite'))) as connection:
@ -739,6 +739,8 @@ class KOBO(USBMS):
        # Needs to be outside books collection as in the case of removing
        # the last book from the collection the list of books is empty
        # and the removal of the last book would not occur
        import sqlite3 as sqlite
        with closing(sqlite.connect(self.normalize_path(self._main_prefix +
            '.kobo/KoboReader.sqlite'))) as connection:
@ -850,6 +852,7 @@ class KOBO(USBMS):
            debug_print('FAILED to upload cover', filepath)
    def _upload_cover(self, path, filename, metadata, filepath, uploadgrayscale):
        from calibre.utils.magick.draw import save_cover_data_to
        if metadata.cover:
            cover = self.normalize_path(metadata.cover.replace('/', os.sep))
@ -859,6 +862,7 @@ class KOBO(USBMS):
                ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(filepath)
                ContentID = self.contentid_from_path(filepath, ContentType)
                import sqlite3 as sqlite
                with closing(sqlite.connect(self.normalize_path(self._main_prefix +
                    '.kobo/KoboReader.sqlite'))) as connection:
--- a/src/calibre/devices/prs505/sony_cache.py
+++ b/src/calibre/devices/prs505/sony_cache.py
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os, time
 from base64 import b64decode
 from uuid import uuid4
 from lxml import etree
 from datetime import date
 from calibre import prints, guess_type, isbytestring
@ -78,6 +76,7 @@ def strftime(epoch, zone=time.localtime):
    return ' '.join(src)
 def uuid():
    from uuid import uuid4
    return str(uuid4()).replace('-', '', 1).upper()
 # }}}
@ -85,6 +84,8 @@ def uuid():
 class XMLCache(object):
    def __init__(self, paths, ext_paths, prefixes, use_author_sort):
        from lxml import etree
        if DEBUG:
            debug_print('Building XMLCache...', paths)
        self.paths = paths
@ -714,6 +715,8 @@ class XMLCache(object):
    def write(self):
        from lxml import etree
        for i, path in self.paths.items():
            self.move_playlists_to_bottom()
            self.cleanup_whitespace(i)
--- a/src/calibre/devices/prst1/driver.py
+++ b/src/calibre/devices/prst1/driver.py
@ -12,8 +12,6 @@ Device driver for the SONY T1 devices
 '''
 import os, time, re
 import sqlite3 as sqlite
 from sqlite3 import DatabaseError
 from contextlib import closing
 from datetime import date
@ -146,6 +144,8 @@ class PRST1(USBMS):
        return True
    def books(self, oncard=None, end_session=True):
        import sqlite3 as sqlite
        dummy_bl = BookList(None, None, None)
        if (
@ -246,6 +246,8 @@ class PRST1(USBMS):
        debug_print('PRST1: finished sync_booklists')
    def update_device_database(self, booklist, collections_attributes, oncard):
        import sqlite3 as sqlite
        debug_print('PRST1: starting update_device_database')
        plugboard = None
@ -274,6 +276,8 @@ class PRST1(USBMS):
    def update_device_books(self, connection, booklist, source_id, plugboard,
            dbpath):
        from sqlite3 import DatabaseError
        opts = self.settings()
        upload_covers = opts.extra_customization[self.OPT_UPLOAD_COVERS]
        refresh_covers = opts.extra_customization[self.OPT_REFRESH_COVERS]
@ -489,6 +493,8 @@ class PRST1(USBMS):
        debug_print('PRS-T1: finished rebuild_collections')
    def upload_cover(self, path, filename, metadata, filepath):
        import sqlite3 as sqlite
        debug_print('PRS-T1: uploading cover')
        if filepath.startswith(self._main_prefix):
--- a/src/calibre/devices/usbms/driver.py
+++ b/src/calibre/devices/usbms/driver.py
@ -10,7 +10,7 @@ driver. It is intended to be subclassed with the relevant parts implemented
 for a particular device.
 '''
-import os, re, time, json, uuid, functools, shutil
+import os, re, time, json, functools, shutil
 from itertools import cycle
 from calibre.constants import numeric_version
@ -58,6 +58,7 @@ class USBMS(CLI, Device):
    SCAN_FROM_ROOT = False
    def _update_driveinfo_record(self, dinfo, prefix, location_code, name=None):
        import uuid
        if not isinstance(dinfo, dict):
            dinfo = {}
        if dinfo.get('device_store_uuid', None) is None:
--- a/src/calibre/devices/user_defined/driver.py
+++ b/src/calibre/devices/user_defined/driver.py
@ -90,6 +90,10 @@ class USER_DEFINED(USBMS):
    OPT_CARD_A_FOLDER           = 9
    def initialize(self):
        self.plugin_needs_delayed_initialization = True
        USBMS.initialize(self)
    def do_delayed_plugin_initialization(self):
        try:
            e = self.settings().extra_customization
            self.VENDOR_ID          = int(e[self.OPT_USB_VENDOR_ID], 16)
@ -107,4 +111,6 @@ class USER_DEFINED(USBMS):
        except:
            import traceback
            traceback.print_exc()
-        USBMS.initialize(self)
+        self.plugin_needs_delayed_initialization = False
--- a/src/calibre/ebooks/chardet.py
+++ b/src/calibre/ebooks/chardet.py
@ -8,7 +8,6 @@ __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, codecs
 from chardet import detect
 ENCODING_PATS = [
                 re.compile(r'<\?[^<>]+encoding\s*=\s*[\'"](.*?)[\'"][^<>]*>',
@ -34,8 +33,13 @@ def substitute_entites(raw):
 _CHARSET_ALIASES = { "macintosh" : "mac-roman",
                        "x-sjis" : "shift-jis" }
 def detect(*args, **kwargs):
    from chardet import detect
    return detect(*args, **kwargs)
 def force_encoding(raw, verbose, assume_utf8=False):
    from calibre.constants import preferred_encoding
    try:
        chardet = detect(raw[:1024*50])
    except:
--- a/src/calibre/ebooks/comic/input.py
+++ b/src/calibre/ebooks/comic/input.py
@ -7,11 +7,10 @@ __docformat__ = 'restructuredtext en'
 Based on ideas from comiclrf created by FangornUK.
 '''
-import os, shutil, traceback, textwrap, time, codecs
+import os, traceback, time
 from Queue import Empty
-from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
+from calibre import extract, prints, walk
 from calibre import extract, CurrentDir, prints, walk
 from calibre.constants import filesystem_encoding
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.ipc.server import Server
@ -273,245 +272,4 @@ def process_pages(pages, opts, update, tdir):
    return ans, failures
 class ComicInput(InputFormatPlugin):
    name        = 'Comic Input'
    author      = 'Kovid Goyal'
    description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
    file_types  = set(['cbz', 'cbr', 'cbc'])
    is_image_collection = True
    core_usage = -1
    options = set([
        OptionRecommendation(name='colors', recommended_value=256,
            help=_('Number of colors for grayscale image conversion. Default: '
                '%default. Values of less than 256 may result in blurred text '
                'on your device if you are creating your comics in EPUB format.')),
        OptionRecommendation(name='dont_normalize', recommended_value=False,
            help=_('Disable normalize (improve contrast) color range '
            'for pictures. Default: False')),
        OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
            help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
        OptionRecommendation(name='dont_sharpen', recommended_value=False,
            help=_('Disable sharpening.')),
        OptionRecommendation(name='disable_trim', recommended_value=False,
            help=_('Disable trimming of comic pages. For some comics, '
                     'trimming might remove content as well as borders.')),
        OptionRecommendation(name='landscape', recommended_value=False,
            help=_("Don't split landscape images into two portrait images")),
        OptionRecommendation(name='wide', recommended_value=False,
            help=_("Keep aspect ratio and scale image using screen height as "
            "image width for viewing in landscape mode.")),
        OptionRecommendation(name='right2left', recommended_value=False,
              help=_('Used for right-to-left publications like manga. '
              'Causes landscape pages to be split into portrait pages '
              'from right to left.')),
        OptionRecommendation(name='despeckle', recommended_value=False,
              help=_('Enable Despeckle. Reduces speckle noise. '
              'May greatly increase processing time.')),
        OptionRecommendation(name='no_sort', recommended_value=False,
              help=_("Don't sort the files found in the comic "
              "alphabetically by name. Instead use the order they were "
              "added to the comic.")),
        OptionRecommendation(name='output_format', choices=['png', 'jpg'],
            recommended_value='png', help=_('The format that images in the created ebook '
                'are converted to. You can experiment to see which format gives '
                'you optimal size and look on your device.')),
        OptionRecommendation(name='no_process', recommended_value=False,
              help=_("Apply no processing to the image")),
        OptionRecommendation(name='dont_grayscale', recommended_value=False,
            help=_('Do not convert the image to grayscale (black and white)')),
        OptionRecommendation(name='comic_image_size', recommended_value=None,
            help=_('Specify the image size as widthxheight pixels. Normally,'
                ' an image size is automatically calculated from the output '
                'profile, this option overrides it.')),
        OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
            help=_('When converting a CBC do not add links to each page to'
                ' the TOC. Note this only applies if the TOC has more than one'
                ' section')),
        ])
    recommendations = set([
        ('margin_left', 0, OptionRecommendation.HIGH),
        ('margin_top',  0, OptionRecommendation.HIGH),
        ('margin_right', 0, OptionRecommendation.HIGH),
        ('margin_bottom', 0, OptionRecommendation.HIGH),
        ('insert_blank_line', False, OptionRecommendation.HIGH),
        ('remove_paragraph_spacing',  False, OptionRecommendation.HIGH),
        ('change_justification', 'left', OptionRecommendation.HIGH),
        ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
        ('chapter', None, OptionRecommendation.HIGH),
        ('page_breaks_brefore', None, OptionRecommendation.HIGH),
        ('use_auto_toc', False, OptionRecommendation.HIGH),
        ('page_breaks_before', None, OptionRecommendation.HIGH),
        ('disable_font_rescaling', True, OptionRecommendation.HIGH),
        ('linearize_tables', False, OptionRecommendation.HIGH),
        ])
    def get_comics_from_collection(self, stream):
        from calibre.libunzip import extract as zipextract
        tdir = PersistentTemporaryDirectory('_comic_collection')
        zipextract(stream, tdir)
        comics = []
        with CurrentDir(tdir):
            if not os.path.exists('comics.txt'):
                raise ValueError((
                    '%s is not a valid comic collection'
                    ' no comics.txt was found in the file')
                        %stream.name)
            raw = open('comics.txt', 'rb').read()
            if raw.startswith(codecs.BOM_UTF16_BE):
                raw = raw.decode('utf-16-be')[1:]
            elif raw.startswith(codecs.BOM_UTF16_LE):
                raw = raw.decode('utf-16-le')[1:]
            elif raw.startswith(codecs.BOM_UTF8):
                raw = raw.decode('utf-8')[1:]
            else:
                raw = raw.decode('utf-8')
            for line in raw.splitlines():
                line = line.strip()
                if not line:
                    continue
                fname, title = line.partition(':')[0], line.partition(':')[-1]
                fname = fname.replace('#', '_')
                fname = os.path.join(tdir, *fname.split('/'))
                if not title:
                    title = os.path.basename(fname).rpartition('.')[0]
                if os.access(fname, os.R_OK):
                    comics.append([title, fname])
        if not comics:
            raise ValueError('%s has no comics'%stream.name)
        return comics
    def get_pages(self, comic, tdir2):
        tdir  = extract_comic(comic)
        new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
                verbose=self.opts.verbose)
        thumbnail = None
        if not new_pages:
            raise ValueError('Could not find any pages in the comic: %s'
                    %comic)
        if self.opts.no_process:
            n2 = []
            for page in new_pages:
                n2.append(os.path.join(tdir2, os.path.basename(page)))
                shutil.copyfile(page, n2[-1])
            new_pages = n2
        else:
            new_pages, failures = process_pages(new_pages, self.opts,
                    self.report_progress, tdir2)
            if failures:
                self.log.warning('Could not process the following pages '
                '(run with --verbose to see why):')
                for f in failures:
                    self.log.warning('\t', f)
            if not new_pages:
                raise ValueError('Could not find any valid pages in comic: %s'
                        % comic)
            thumbnail = os.path.join(tdir2,
                    'thumbnail.'+self.opts.output_format.lower())
            if not os.access(thumbnail, os.R_OK):
                thumbnail = None
        return new_pages
    def get_images(self):
        return self._images
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC
        self.opts, self.log= opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages: continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))
        if not comics:
            raise ValueError('No comic pages found in %s'%stream.name)
        mi  = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
            [_('Unknown')])
        opf = OPFCreator(os.path.abspath('.'), mi)
        entries = []
        def href(x):
            if len(comics) == 1: return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])
        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
                        play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x), None,
                                _('Page')+' %d'%(i+1), play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, 'toc.ncx')
        return os.path.abspath('metadata.opf')
    def create_wrappers(self, pages):
        from calibre.ebooks.oeb.base import XHTML_NS
        wrappers = []
        WRAPPER = textwrap.dedent('''\
        <html xmlns="%s">
            <head>
                <title>Page #%d</title>
                <style type="text/css">
                    @page { margin:0pt; padding: 0pt}
                    body { margin: 0pt; padding: 0pt}
                    div { text-align: center }
                </style>
            </head>
            <body>
                <div>
                    <img src="%s" alt="comic page #%d" />
                </div>
            </body>
        </html>
        ''')
        dir = os.path.dirname(pages[0])
        for i, page in enumerate(pages):
            wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
            page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
            open(page, 'wb').write(wrapper)
            wrappers.append(page)
        return wrappers
--- a/src/calibre/ebooks/conversion/plugins/init.py
+++ b/src/calibre/ebooks/conversion/plugins/init.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
--- a/src/calibre/ebooks/conversion/plugins/azw4_input.py
+++ b/src/calibre/ebooks/conversion/plugins/azw4_input.py
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.azw4.reader import Reader
 class AZW4Input(InputFormatPlugin):
@ -19,6 +17,9 @@ class AZW4Input(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.pdb.header import PdbHeaderReader
        from calibre.ebooks.azw4.reader import Reader
        header = PdbHeaderReader(stream)
        reader = Reader(header, stream, log, options)
        opf = reader.extract_content(os.getcwd())
--- a/src/calibre/ebooks/conversion/plugins/chm_input.py
+++ b/src/calibre/ebooks/conversion/plugins/chm_input.py
@ -3,9 +3,7 @@ __license__ = 'GPL v3'
 __copyright__  = '2008, Kovid Goyal <kovid at kovidgoyal.net>,' \
                 ' and Alex Bramley <a.bramley at gmail.com>.'
-import os, uuid
+import os
 from lxml import html
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
@ -77,7 +75,7 @@ class CHMInput(InputFormatPlugin):
    def _create_oebbook_html(self, htmlpath, basedir, opts, log, mi):
        # use HTMLInput plugin to generate book
-        from calibre.ebooks.html.input import HTMLInput
+        from calibre.customize.builtins import HTMLInput
        opts.breadth_first = True
        htmlinput = HTMLInput(None)
        oeb = htmlinput.create_oebbook(htmlpath, basedir, opts, log, mi)
@ -85,6 +83,8 @@ class CHMInput(InputFormatPlugin):
    def _create_oebbook(self, hhcpath, basedir, opts, log, mi):
        import uuid
        from lxml import html
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import DirContainer
        oeb = create_oebbook(log, None, opts,
@ -142,6 +142,7 @@ class CHMInput(InputFormatPlugin):
        return oeb
    def _create_html_root(self, hhcpath, log):
        from lxml import html
        hhcdata = self._read_file(hhcpath)
        hhcroot = html.fromstring(hhcdata)
        chapters = self._process_nodes(hhcroot)
--- a/src/calibre/ebooks/conversion/plugins/comic_input.py
+++ b/src/calibre/ebooks/conversion/plugins/comic_input.py
@ -0,0 +1,259 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Based on ideas from comiclrf created by FangornUK.
 '''
 import shutil, textwrap, codecs, os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre import CurrentDir
 from calibre.ptempfile import PersistentTemporaryDirectory
 class ComicInput(InputFormatPlugin):
    name        = 'Comic Input'
    author      = 'Kovid Goyal'
    description = 'Optimize comic files (.cbz, .cbr, .cbc) for viewing on portable devices'
    file_types  = set(['cbz', 'cbr', 'cbc'])
    is_image_collection = True
    core_usage = -1
    options = set([
        OptionRecommendation(name='colors', recommended_value=256,
            help=_('Number of colors for grayscale image conversion. Default: '
                '%default. Values of less than 256 may result in blurred text '
                'on your device if you are creating your comics in EPUB format.')),
        OptionRecommendation(name='dont_normalize', recommended_value=False,
            help=_('Disable normalize (improve contrast) color range '
            'for pictures. Default: False')),
        OptionRecommendation(name='keep_aspect_ratio', recommended_value=False,
            help=_('Maintain picture aspect ratio. Default is to fill the screen.')),
        OptionRecommendation(name='dont_sharpen', recommended_value=False,
            help=_('Disable sharpening.')),
        OptionRecommendation(name='disable_trim', recommended_value=False,
            help=_('Disable trimming of comic pages. For some comics, '
                     'trimming might remove content as well as borders.')),
        OptionRecommendation(name='landscape', recommended_value=False,
            help=_("Don't split landscape images into two portrait images")),
        OptionRecommendation(name='wide', recommended_value=False,
            help=_("Keep aspect ratio and scale image using screen height as "
            "image width for viewing in landscape mode.")),
        OptionRecommendation(name='right2left', recommended_value=False,
              help=_('Used for right-to-left publications like manga. '
              'Causes landscape pages to be split into portrait pages '
              'from right to left.')),
        OptionRecommendation(name='despeckle', recommended_value=False,
              help=_('Enable Despeckle. Reduces speckle noise. '
              'May greatly increase processing time.')),
        OptionRecommendation(name='no_sort', recommended_value=False,
              help=_("Don't sort the files found in the comic "
              "alphabetically by name. Instead use the order they were "
              "added to the comic.")),
        OptionRecommendation(name='output_format', choices=['png', 'jpg'],
            recommended_value='png', help=_('The format that images in the created ebook '
                'are converted to. You can experiment to see which format gives '
                'you optimal size and look on your device.')),
        OptionRecommendation(name='no_process', recommended_value=False,
              help=_("Apply no processing to the image")),
        OptionRecommendation(name='dont_grayscale', recommended_value=False,
            help=_('Do not convert the image to grayscale (black and white)')),
        OptionRecommendation(name='comic_image_size', recommended_value=None,
            help=_('Specify the image size as widthxheight pixels. Normally,'
                ' an image size is automatically calculated from the output '
                'profile, this option overrides it.')),
        OptionRecommendation(name='dont_add_comic_pages_to_toc', recommended_value=False,
            help=_('When converting a CBC do not add links to each page to'
                ' the TOC. Note this only applies if the TOC has more than one'
                ' section')),
        ])
    recommendations = set([
        ('margin_left', 0, OptionRecommendation.HIGH),
        ('margin_top',  0, OptionRecommendation.HIGH),
        ('margin_right', 0, OptionRecommendation.HIGH),
        ('margin_bottom', 0, OptionRecommendation.HIGH),
        ('insert_blank_line', False, OptionRecommendation.HIGH),
        ('remove_paragraph_spacing',  False, OptionRecommendation.HIGH),
        ('change_justification', 'left', OptionRecommendation.HIGH),
        ('dont_split_on_pagebreaks', True, OptionRecommendation.HIGH),
        ('chapter', None, OptionRecommendation.HIGH),
        ('page_breaks_brefore', None, OptionRecommendation.HIGH),
        ('use_auto_toc', False, OptionRecommendation.HIGH),
        ('page_breaks_before', None, OptionRecommendation.HIGH),
        ('disable_font_rescaling', True, OptionRecommendation.HIGH),
        ('linearize_tables', False, OptionRecommendation.HIGH),
        ])
    def get_comics_from_collection(self, stream):
        from calibre.libunzip import extract as zipextract
        tdir = PersistentTemporaryDirectory('_comic_collection')
        zipextract(stream, tdir)
        comics = []
        with CurrentDir(tdir):
            if not os.path.exists('comics.txt'):
                raise ValueError((
                    '%s is not a valid comic collection'
                    ' no comics.txt was found in the file')
                        %stream.name)
            raw = open('comics.txt', 'rb').read()
            if raw.startswith(codecs.BOM_UTF16_BE):
                raw = raw.decode('utf-16-be')[1:]
            elif raw.startswith(codecs.BOM_UTF16_LE):
                raw = raw.decode('utf-16-le')[1:]
            elif raw.startswith(codecs.BOM_UTF8):
                raw = raw.decode('utf-8')[1:]
            else:
                raw = raw.decode('utf-8')
            for line in raw.splitlines():
                line = line.strip()
                if not line:
                    continue
                fname, title = line.partition(':')[0], line.partition(':')[-1]
                fname = fname.replace('#', '_')
                fname = os.path.join(tdir, *fname.split('/'))
                if not title:
                    title = os.path.basename(fname).rpartition('.')[0]
                if os.access(fname, os.R_OK):
                    comics.append([title, fname])
        if not comics:
            raise ValueError('%s has no comics'%stream.name)
        return comics
    def get_pages(self, comic, tdir2):
        from calibre.ebooks.comic.input import (extract_comic,  process_pages,
                find_pages)
        tdir  = extract_comic(comic)
        new_pages = find_pages(tdir, sort_on_mtime=self.opts.no_sort,
                verbose=self.opts.verbose)
        thumbnail = None
        if not new_pages:
            raise ValueError('Could not find any pages in the comic: %s'
                    %comic)
        if self.opts.no_process:
            n2 = []
            for page in new_pages:
                n2.append(os.path.join(tdir2, os.path.basename(page)))
                shutil.copyfile(page, n2[-1])
            new_pages = n2
        else:
            new_pages, failures = process_pages(new_pages, self.opts,
                    self.report_progress, tdir2)
            if failures:
                self.log.warning('Could not process the following pages '
                '(run with --verbose to see why):')
                for f in failures:
                    self.log.warning('\t', f)
            if not new_pages:
                raise ValueError('Could not find any valid pages in comic: %s'
                        % comic)
            thumbnail = os.path.join(tdir2,
                    'thumbnail.'+self.opts.output_format.lower())
            if not os.access(thumbnail, os.R_OK):
                thumbnail = None
        return new_pages
    def get_images(self):
        return self._images
    def convert(self, stream, opts, file_ext, log, accelerators):
        from calibre.ebooks.metadata import MetaInformation
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.toc import TOC
        self.opts, self.log= opts, log
        if file_ext == 'cbc':
            comics_ = self.get_comics_from_collection(stream)
        else:
            comics_ = [['Comic', os.path.abspath(stream.name)]]
        stream.close()
        comics = []
        for i, x in enumerate(comics_):
            title, fname = x
            cdir = 'comic_%d'%(i+1) if len(comics_) > 1 else '.'
            cdir = os.path.abspath(cdir)
            if not os.path.exists(cdir):
                os.makedirs(cdir)
            pages = self.get_pages(fname, cdir)
            if not pages: continue
            wrappers = self.create_wrappers(pages)
            comics.append((title, pages, wrappers))
        if not comics:
            raise ValueError('No comic pages found in %s'%stream.name)
        mi  = MetaInformation(os.path.basename(stream.name).rpartition('.')[0],
            [_('Unknown')])
        opf = OPFCreator(os.path.abspath('.'), mi)
        entries = []
        def href(x):
            if len(comics) == 1: return os.path.basename(x)
            return '/'.join(x.split(os.sep)[-2:])
        for comic in comics:
            pages, wrappers = comic[1:]
            entries += [(w, None) for w in map(href, wrappers)] + \
                    [(x, None) for x in map(href, pages)]
        opf.create_manifest(entries)
        spine = []
        for comic in comics:
            spine.extend(map(href, comic[2]))
        self._images = []
        for comic in comics:
            self._images.extend(comic[1])
        opf.create_spine(spine)
        toc = TOC()
        if len(comics) == 1:
            wrappers = comics[0][2]
            for i, x in enumerate(wrappers):
                toc.add_item(href(x), None, _('Page')+' %d'%(i+1),
                        play_order=i)
        else:
            po = 0
            for comic in comics:
                po += 1
                wrappers = comic[2]
                stoc = toc.add_item(href(wrappers[0]),
                        None, comic[0], play_order=po)
                if not opts.dont_add_comic_pages_to_toc:
                    for i, x in enumerate(wrappers):
                        stoc.add_item(href(x), None,
                                _('Page')+' %d'%(i+1), play_order=po)
                        po += 1
        opf.set_toc(toc)
        m, n = open('metadata.opf', 'wb'), open('toc.ncx', 'wb')
        opf.render(m, n, 'toc.ncx')
        return os.path.abspath('metadata.opf')
    def create_wrappers(self, pages):
        from calibre.ebooks.oeb.base import XHTML_NS
        wrappers = []
        WRAPPER = textwrap.dedent('''\
        <html xmlns="%s">
            <head>
                <title>Page #%d</title>
                <style type="text/css">
                    @page { margin:0pt; padding: 0pt}
                    body { margin: 0pt; padding: 0pt}
                    div { text-align: center }
                </style>
            </head>
            <body>
                <div>
                    <img src="%s" alt="comic page #%d" />
                </div>
            </body>
        </html>
        ''')
        dir = os.path.dirname(pages[0])
        for i, page in enumerate(pages):
            wrapper = WRAPPER%(XHTML_NS, i+1, os.path.basename(page), i+1)
            page = os.path.join(dir, 'page_%d.xhtml'%(i+1))
            open(page, 'wb').write(wrapper)
            wrappers.append(page)
        return wrappers
--- a/src/calibre/ebooks/conversion/plugins/djvu_input.py
+++ b/src/calibre/ebooks/conversion/plugins/djvu_input.py
@ -12,7 +12,6 @@ from subprocess import Popen, PIPE
 from cStringIO import StringIO
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.txt.processor import convert_basic
 class DJVUInput(InputFormatPlugin):
@ -28,6 +27,8 @@ class DJVUInput(InputFormatPlugin):
    ])
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.txt.processor import convert_basic
        stdout = StringIO()
        ppdjvu = True
        # using djvutxt is MUCH faster, should make it an option
--- a/src/calibre/ebooks/conversion/plugins/epub_input.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_input.py
@ -3,11 +3,9 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, uuid
+import os
 from itertools import cycle
 from lxml import etree
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 class EPUBInput(InputFormatPlugin):
@ -30,6 +28,8 @@ class EPUBInput(InputFormatPlugin):
            f.write(raw[1024:])
    def process_encryption(self, encfile, opf, log):
        from lxml import etree
        import uuid
        key = None
        for item in opf.identifier_iter():
            scheme = None
@ -65,6 +65,7 @@ class EPUBInput(InputFormatPlugin):
        return False
    def rationalize_cover(self, opf, log):
        from lxml import etree
        guide_cover, guide_elem = None, None
        for guide_elem in opf.iterguide():
            if guide_elem.get('type', '').lower() == 'cover':
@ -110,6 +111,7 @@ class EPUBInput(InputFormatPlugin):
                    renderer)
    def find_opf(self):
        from lxml import etree
        def attr(n, attr):
            for k, v in n.attrib.items():
                if k.endswith(attr):
--- a/src/calibre/ebooks/conversion/plugins/epub_output.py
+++ b/src/calibre/ebooks/conversion/plugins/epub_output.py
@ -8,14 +8,12 @@ __docformat__ = 'restructuredtext en'
 import os, shutil, re
-from calibre.customize.conversion import OutputFormatPlugin
+from calibre.customize.conversion import (OutputFormatPlugin,
        OptionRecommendation)
 from calibre.ptempfile import TemporaryDirectory
 from calibre import CurrentDir
 from calibre.customize.conversion import OptionRecommendation
 from calibre.constants import filesystem_encoding
 from lxml import etree
 block_level_tags = (
      'address',
      'body',
@ -289,6 +287,7 @@ class EPUBOutput(OutputFormatPlugin):
    # }}}
    def condense_ncx(self, ncx_path):
        from lxml import etree
        if not self.opts.pretty_print:
            tree = etree.parse(ncx_path)
            for tag in tree.getroot().iter(tag=etree.Element):
--- a/src/calibre/ebooks/conversion/plugins/fb2_input.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_input.py
@ -6,7 +6,6 @@ Convert .fb2 files to .lrf
 """
 import os, re
 from base64 import b64decode
 from lxml import etree
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre import guess_type
@ -38,6 +37,7 @@ class FB2Input(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.oeb.base import XLINK_NS, XHTML_NS, RECOVER_PARSER
--- a/src/calibre/ebooks/conversion/plugins/fb2_output.py
+++ b/src/calibre/ebooks/conversion/plugins/fb2_output.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre.ebooks.fb2.fb2ml import FB2MLizer
 class FB2Output(OutputFormatPlugin):
@ -162,6 +161,7 @@ class FB2Output(OutputFormatPlugin):
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.transforms.jacket import linearize_jacket
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        from calibre.ebooks.fb2.fb2ml import FB2MLizer
        try:
            rasterizer = SVGRasterizer()
--- a/src/calibre/ebooks/conversion/plugins/html_input.py
+++ b/src/calibre/ebooks/conversion/plugins/html_input.py
@ -0,0 +1,283 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import re, tempfile, os
 from functools import partial
 from itertools import izip
 from urllib import quote
 from calibre.constants import islinux, isbsd
 from calibre.customize.conversion import (InputFormatPlugin,
        OptionRecommendation)
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
 class HTMLInput(InputFormatPlugin):
    name        = 'HTML Input'
    author      = 'Kovid Goyal'
    description = 'Convert HTML and OPF files to an OEB'
    file_types  = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
    options = set([
        OptionRecommendation(name='breadth_first',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Traverse links in HTML files breadth first. Normally, '
                    'they are traversed depth first.'
                   )
        ),
        OptionRecommendation(name='max_levels',
            recommended_value=5, level=OptionRecommendation.LOW,
            help=_('Maximum levels of recursion when following links in '
                   'HTML files. Must be non-negative. 0 implies that no '
                   'links in the root HTML file are followed. Default is '
                   '%default.'
                   )
        ),
        OptionRecommendation(name='dont_package',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Normally this input plugin re-arranges all the input '
                'files into a standard folder hierarchy. Only use this option '
                'if you know what you are doing as it can result in various '
                'nasty side effects in the rest of the conversion pipeline.'
                )
        ),
    ])
    def convert(self, stream, opts, file_ext, log,
                accelerators):
        self._is_case_sensitive = None
        basedir = os.getcwd()
        self.opts = opts
        fname = None
        if hasattr(stream, 'name'):
            basedir = os.path.dirname(stream.name)
            fname = os.path.basename(stream.name)
        if file_ext != 'opf':
            if opts.dont_package:
                raise ValueError('The --dont-package option is not supported for an HTML input file')
            from calibre.ebooks.metadata.html import get_metadata
            mi = get_metadata(stream)
            if fname:
                from calibre.ebooks.metadata.meta import metadata_from_filename
                fmi = metadata_from_filename(fname)
                fmi.smart_update(mi)
                mi = fmi
            oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
            return oeb
        from calibre.ebooks.conversion.plumber import create_oebbook
        return create_oebbook(log, stream.name, opts,
                encoding=opts.input_encoding)
    def is_case_sensitive(self, path):
        if getattr(self, '_is_case_sensitive', None) is not None:
            return self._is_case_sensitive
        if not path or not os.path.exists(path):
            return islinux or isbsd
        self._is_case_sensitive = not (os.path.exists(path.lower()) \
                and os.path.exists(path.upper()))
        return self._is_case_sensitive
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
            xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        from calibre.ebooks.html.input import get_filelist
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb
        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', self.oeb.translate(__('Unknown')))
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break
        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                    href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)
        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))
        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data,
                        partial(self.resource_adder, base=dpath))
        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear: continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear: continue
            toc.add(title, item.href)
        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
    def link_to_local_path(self, link_, base=None):
        from calibre.ebooks.html.input import Link
        if not isinstance(link_, unicode):
            try:
                link_ = link_.decode('utf-8', 'error')
            except:
                self.log.warn('Failed to decode link %r. Ignoring'%link_)
                return None, None
        try:
            l = Link(link_, base if base else os.getcwdu())
        except:
            self.log.exception('Failed to process link: %r'%link_)
            return None, None
        if l.path is None:
            # Not a local resource
            return None, None
        link = l.path.replace('/', os.sep).strip()
        frag = l.fragment
        if not link:
            return None, None
        return link, frag
    def resource_adder(self, link_, base=None):
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(id='added',
                    href=bhref)
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r'%link_)
                return None
            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                    self.oeb.log, ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode):
                bhref = bhref.encode('utf-8')
            item.html_input_href = quote(bhref).decode('utf-8')
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href
        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
    def css_import_handler(self, base, href):
        link, frag = self.link_to_local_path(href, base=base)
        if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
            return (None, None)
        try:
            raw = open(link, 'rb').read().decode('utf-8', 'replace')
            raw = self.oeb.css_preprocessor(raw, add_namespace=True)
        except:
            self.log.exception('Failed to read CSS file: %r'%link)
            return (None, None)
        return (None, raw)
--- a/src/calibre/ebooks/conversion/plugins/html_output.py
+++ b/src/calibre/ebooks/conversion/plugins/html_output.py
@ -4,22 +4,11 @@ __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
 __docformat__ = 'restructuredtext en'
 import os, re, shutil
 from calibre.utils import zipfile
 from os.path import dirname, abspath, relpath, exists, basename
 from lxml import etree
 from templite import Templite
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre import CurrentDir
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from urllib import unquote
 from calibre.ebooks.html.meta import EasyMeta
 class HTMLOutput(OutputFormatPlugin):
@ -50,6 +39,9 @@ class HTMLOutput(OutputFormatPlugin):
        '''
        Generate table of contents
        '''
        from lxml import etree
        from urllib import unquote
        from calibre.ebooks.oeb.base import element
        with CurrentDir(output_dir):
            def build_node(current_node, parent=None):
@ -72,11 +64,18 @@ class HTMLOutput(OutputFormatPlugin):
            return wrap
    def generate_html_toc(self, oeb_book, ref_url, output_dir):
        from lxml import etree
        root = self.generate_toc(oeb_book, ref_url, output_dir)
        return etree.tostring(root, pretty_print=True, encoding='utf-8',
                xml_declaration=False)
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.utils import zipfile
        from templite import Templite
        from urllib import unquote
        from calibre.ebooks.html.meta import EasyMeta
        # read template files
        if opts.template_html_index is not None:
@ -192,7 +191,7 @@ class HTMLOutput(OutputFormatPlugin):
                    f.write(t)
                item.unload_data_from_memory(memory=path)
-        zfile = ZipFile(output_path, "w")
+        zfile = zipfile.ZipFile(output_path, "w")
        zfile.add_dir(output_dir, basename(output_dir))
        zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
--- a/src/calibre/ebooks/conversion/plugins/htmlz_input.py
+++ b/src/calibre/ebooks/conversion/plugins/htmlz_input.py
@ -10,9 +10,6 @@ import os
 from calibre import guess_type
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.utils.zipfile import ZipFile
 class HTMLZInput(InputFormatPlugin):
@ -23,6 +20,10 @@ class HTMLZInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.ebooks.metadata.opf2 import OPF
        from calibre.utils.zipfile import ZipFile
        self.log = log
        html = u''
        top_levels = []
--- a/src/calibre/ebooks/conversion/plugins/htmlz_output.py
+++ b/src/calibre/ebooks/conversion/plugins/htmlz_output.py
@ -9,13 +9,10 @@ __docformat__ = 'restructuredtext en'
 import os
 from cStringIO import StringIO
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 class HTMLZOutput(OutputFormatPlugin):
@ -43,7 +40,10 @@ class HTMLZOutput(OutputFormatPlugin):
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
        from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
        from calibre.utils.zipfile import ZipFile
        # HTML
        if opts.htmlz_css_type == 'inline':
--- a/src/calibre/ebooks/conversion/plugins/lit_input.py
+++ b/src/calibre/ebooks/conversion/plugins/lit_input.py
--- a/src/calibre/ebooks/conversion/plugins/lit_output.py
+++ b/src/calibre/ebooks/conversion/plugins/lit_output.py
--- a/src/calibre/ebooks/conversion/plugins/lrf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/lrf_input.py
@ -0,0 +1,87 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import os, sys
 from calibre.customize.conversion import InputFormatPlugin
 class LRFInput(InputFormatPlugin):
    name        = 'LRF Input'
    author      = 'Kovid Goyal'
    description = 'Convert LRF files to HTML'
    file_types  = set(['lrf'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.lrf.input import (MediaType, Styles, TextBlock,
                Canvas, ImageBlock, RuledLine)
        self.log = log
        self.log('Generating XML')
        from calibre.ebooks.lrf.lrfparser import LRFDocument
        d = LRFDocument(stream)
        d.parse()
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open('lrs.xml', 'wb').write(xml.encode('utf-8'))
        parser = etree.XMLParser(no_network=True, huge_tree=True)
        try:
            doc = etree.fromstring(xml, parser=parser)
        except:
            self.log.warn('Failed to parse XML. Trying to recover')
            parser = etree.XMLParser(no_network=True, huge_tree=True,
                    recover=True)
            doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
            ro = x.get('refobj')
            jump_button = doc.xpath('//*[@objid="%s"]'%ro)
            if jump_button:
                jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
                if jump_to:
                    char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
                            jump_to[0].get('refobj'))
        plot_map = {}
        for x in doc.xpath('//Plot[@refobj]'):
            ro = x.get('refobj')
            image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
            if image:
                imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
                    image[0].get('refstream'))
                if imgstr:
                    plot_map[ro] = imgstr[0].get('file')
        self.log('Converting XML to HTML...')
        styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
        media_type = MediaType()
        styles = Styles()
        text_block = TextBlock(styles, char_button_map, plot_map, log)
        canvas = Canvas(doc, styles, text_block, log)
        image_block = ImageBlock(canvas)
        ruled_line = RuledLine()
        extensions = {
                ('calibre', 'media-type') : media_type,
                ('calibre', 'text-block') : text_block,
                ('calibre', 'ruled-line') : ruled_line,
                ('calibre', 'styles')     : styles,
                ('calibre', 'canvas')     : canvas,
                ('calibre', 'image-block'): image_block,
                }
        transform = etree.XSLT(styledoc, extensions=extensions)
        try:
            result = transform(doc)
        except RuntimeError:
            sys.setrecursionlimit(5000)
            result = transform(doc)
        with open('content.opf', 'wb') as f:
            f.write(result)
        styles.write()
        return os.path.abspath('content.opf')
--- a/src/calibre/ebooks/conversion/plugins/lrf_output.py
+++ b/src/calibre/ebooks/conversion/plugins/lrf_output.py
--- a/src/calibre/ebooks/conversion/plugins/mobi_input.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_input.py
--- a/src/calibre/ebooks/conversion/plugins/mobi_output.py
+++ b/src/calibre/ebooks/conversion/plugins/mobi_output.py
--- a/src/calibre/ebooks/conversion/plugins/odt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/odt_input.py
@ -0,0 +1,25 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 '''
 Convert an ODT file into a Open Ebook
 '''
 from calibre.customize.conversion import InputFormatPlugin
 class ODTInput(InputFormatPlugin):
    name        = 'ODT Input'
    author      = 'Kovid Goyal'
    description = 'Convert ODT (OpenOffice) files to HTML'
    file_types  = set(['odt'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.odt.input import Extract
        return Extract()(stream, '.', log)
--- a/src/calibre/ebooks/conversion/plugins/oeb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/oeb_output.py
@ -5,13 +5,10 @@ __docformat__ = 'restructuredtext en'
 import os, re
 from lxml import etree
-from calibre.customize.conversion import OutputFormatPlugin
+from calibre.customize.conversion import (OutputFormatPlugin,
        OptionRecommendation)
 from calibre import CurrentDir
 from calibre.customize.conversion import OptionRecommendation
 from urllib import unquote
 class OEBOutput(OutputFormatPlugin):
@ -23,6 +20,9 @@ class OEBOutput(OutputFormatPlugin):
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from urllib import unquote
        from lxml import etree
        self.log, self.opts = log, opts
        if not os.path.exists(output_path):
            os.makedirs(output_path)
--- a/src/calibre/ebooks/conversion/plugins/pdb_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pdb_input.py
@ -7,8 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.pdb.header import PdbHeaderReader
 from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
 class PDBInput(InputFormatPlugin):
@ -19,6 +17,9 @@ class PDBInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.pdb.header import PdbHeaderReader
        from calibre.ebooks.pdb import PDBError, IDENTITY_TO_NAME, get_reader
        header = PdbHeaderReader(stream)
        Reader = get_reader(header.ident)
--- a/src/calibre/ebooks/conversion/plugins/pdb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/pdb_output.py
@ -8,7 +8,7 @@ import os
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
-from calibre.ebooks.pdb import PDBError, get_writer, FORMAT_WRITERS
+from calibre.ebooks.pdb import PDBError, get_writer, ALL_FORMAT_WRITERS
 class PDBOutput(OutputFormatPlugin):
@ -19,9 +19,9 @@ class PDBOutput(OutputFormatPlugin):
    options = set([
        OptionRecommendation(name='format', recommended_value='doc',
            level=OptionRecommendation.LOW,
-            short_switch='f', choices=FORMAT_WRITERS.keys(),
+            short_switch='f', choices=list(ALL_FORMAT_WRITERS),
            help=(_('Format to use inside the pdb container. Choices are:')+\
-            ' %s' % FORMAT_WRITERS.keys())),
+            ' %s' % list(ALL_FORMAT_WRITERS))),
        OptionRecommendation(name='pdb_output_encoding', recommended_value='cp1252',
            level=OptionRecommendation.LOW,
            help=_('Specify the character encoding of the output document. ' \
--- a/src/calibre/ebooks/conversion/plugins/pdf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pdf_input.py
@ -7,10 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.pdf.pdftohtml import pdftohtml
 from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.constants import plugins
 pdfreflow, pdfreflow_err = plugins['pdfreflow']
 class PDFInput(InputFormatPlugin):
@ -31,6 +27,9 @@ class PDFInput(InputFormatPlugin):
    ])
    def convert_new(self, stream, accelerators):
        from calibre.constants import plugins
        pdfreflow, pdfreflow_err = plugins['pdfreflow']
        from calibre.ebooks.pdf.reflow import PDFDocument
        from calibre.utils.cleantext import clean_ascii_chars
        if pdfreflow_err:
@ -43,6 +42,9 @@ class PDFInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.pdf.pdftohtml import pdftohtml
        log.debug('Converting file to html...')
        # The main html file will be named index.html
        self.opts, self.log = options, log
--- a/src/calibre/ebooks/conversion/plugins/pdf_output.py
+++ b/src/calibre/ebooks/conversion/plugins/pdf_output.py
@ -13,10 +13,50 @@ import os
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.metadata.opf2 import OPF
 from calibre.ptempfile import TemporaryDirectory
-from calibre.ebooks.pdf.pageoptions import UNITS, PAPER_SIZES, \
+
-    ORIENTATIONS
+UNITS = [
            'millimeter',
            'point',
            'inch' ,
            'pica' ,
            'didot',
            'cicero',
            'devicepixel',
        ]
 PAPER_SIZES = ['b2',
     'a9',
     'executive',
     'tabloid',
     'b4',
     'b5',
     'b6',
     'b7',
     'b0',
     'b1',
     'letter',
     'b3',
     'a7',
     'a8',
     'b8',
     'b9',
     'a3',
     'a1',
     'folio',
     'c5e',
     'dle',
     'a0',
     'ledger',
     'legal',
     'a6',
     'a2',
     'b10',
     'a5',
     'comm10e',
     'a4']
 ORIENTATIONS = ['portrait', 'landscape']
 class PDFOutput(OutputFormatPlugin):
@ -26,23 +66,23 @@ class PDFOutput(OutputFormatPlugin):
    options = set([
        OptionRecommendation(name='unit', recommended_value='inch',
-            level=OptionRecommendation.LOW, short_switch='u', choices=UNITS.keys(),
+            level=OptionRecommendation.LOW, short_switch='u', choices=UNITS,
            help=_('The unit of measure. Default is inch. Choices '
            'are %s '
-            'Note: This does not override the unit for margins!') % UNITS.keys()),
+            'Note: This does not override the unit for margins!') % UNITS),
        OptionRecommendation(name='paper_size', recommended_value='letter',
-            level=OptionRecommendation.LOW, choices=PAPER_SIZES.keys(),
+            level=OptionRecommendation.LOW, choices=PAPER_SIZES,
            help=_('The size of the paper. This size will be overridden when a '
            'non default output profile is used. Default is letter. Choices '
-            'are %s') % PAPER_SIZES.keys()),
+            'are %s') % PAPER_SIZES),
        OptionRecommendation(name='custom_size', recommended_value=None,
            help=_('Custom size of the document. Use the form widthxheight '
            'EG. `123x321` to specify the width and height. '
            'This overrides any specified paper-size.')),
        OptionRecommendation(name='orientation', recommended_value='portrait',
-            level=OptionRecommendation.LOW, choices=ORIENTATIONS.keys(),
+            level=OptionRecommendation.LOW, choices=ORIENTATIONS,
            help=_('The orientation of the page. Default is portrait. Choices '
-            'are %s') % ORIENTATIONS.keys()),
+            'are %s') % ORIENTATIONS),
        OptionRecommendation(name='preserve_cover_aspect_ratio',
            recommended_value=False,
            help=_('Preserve the aspect ratio of the cover, instead'
@ -105,6 +145,8 @@ class PDFOutput(OutputFormatPlugin):
    def convert_text(self, oeb_book):
        from calibre.ebooks.pdf.writer import PDFWriter
        from calibre.ebooks.metadata.opf2 import OPF
        self.log.debug('Serializing oeb input to disk for processing...')
        self.get_cover_data()
--- a/src/calibre/ebooks/conversion/plugins/pml_input.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_input.py
@ -11,9 +11,6 @@ import shutil
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
 from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.metadata.opf2 import OPFCreator
 class PMLInput(InputFormatPlugin):
@ -24,6 +21,8 @@ class PMLInput(InputFormatPlugin):
    file_types  = set(['pml', 'pmlz'])
    def process_pml(self, pml_path, html_path, close_all=False):
        from calibre.ebooks.pml.pmlconverter import PML_HTMLizer
        pclose = False
        hclose = False
@ -85,6 +84,9 @@ class PMLInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.toc import TOC
        from calibre.ebooks.metadata.opf2 import OPFCreator
        self.options = options
        self.log = log
        pages, images = [], []
--- a/src/calibre/ebooks/conversion/plugins/pml_output.py
+++ b/src/calibre/ebooks/conversion/plugins/pml_output.py
@ -4,21 +4,11 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
-import os
+import os, cStringIO
-try:
+from calibre.customize.conversion import (OutputFormatPlugin,
-    from PIL import Image
+        OptionRecommendation)
    Image
 except ImportError:
    import Image
 import cStringIO
 from calibre.customize.conversion import OutputFormatPlugin
 from calibre.customize.conversion import OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 from calibre.ebooks.pml.pmlml import PMLMLizer
 class PMLOutput(OutputFormatPlugin):
@ -43,6 +33,9 @@ class PMLOutput(OutputFormatPlugin):
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.pml.pmlml import PMLMLizer
        from calibre.utils.zipfile import ZipFile
        with TemporaryDirectory('_pmlz_output') as tdir:
            pmlmlizer = PMLMLizer(log)
            pml = unicode(pmlmlizer.extract_content(oeb_book, opts))
@ -59,6 +52,13 @@ class PMLOutput(OutputFormatPlugin):
            pmlz.add_dir(tdir)
    def write_images(self, manifest, image_hrefs, out_dir, opts):
        try:
            from PIL import Image
            Image
        except ImportError:
            import Image
        from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
        for item in manifest:
            if item.media_type in OEB_RASTER_IMAGES and item.href in image_hrefs.keys():
--- a/src/calibre/ebooks/conversion/plugins/rb_input.py
+++ b/src/calibre/ebooks/conversion/plugins/rb_input.py
@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.ebooks.rb.reader import Reader
 from calibre.customize.conversion import InputFormatPlugin
 class RBInput(InputFormatPlugin):
@ -18,6 +17,8 @@ class RBInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.rb.reader import Reader
        reader = Reader(stream, log, options.input_encoding)
        opf = reader.extract_content(os.getcwd())
--- a/src/calibre/ebooks/conversion/plugins/rb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/rb_output.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre.ebooks.rb.writer import RBWriter
 class RBOutput(OutputFormatPlugin):
@ -22,6 +21,8 @@ class RBOutput(OutputFormatPlugin):
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.rb.writer import RBWriter
        close = False
        if not hasattr(output_path, 'write'):
            close = True
--- a/src/calibre/ebooks/conversion/plugins/recipe_input.py
+++ b/src/calibre/ebooks/conversion/plugins/recipe_input.py
--- a/src/calibre/ebooks/conversion/plugins/rtf_input.py
+++ b/src/calibre/ebooks/conversion/plugins/rtf_input.py
@ -0,0 +1,298 @@
 from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, glob, re, textwrap
 from calibre.customize.conversion import InputFormatPlugin
 border_style_map = {
        'single' : 'solid',
        'double-thickness-border' : 'double',
        'shadowed-border': 'outset',
        'double-border': 'double',
        'dotted-border': 'dotted',
        'dashed': 'dashed',
        'hairline': 'solid',
        'inset': 'inset',
        'dash-small': 'dashed',
        'dot-dash': 'dotted',
        'dot-dot-dash': 'dotted',
        'outset': 'outset',
        'tripple': 'double',
        'triple': 'double',
        'thick-thin-small': 'solid',
        'thin-thick-small': 'solid',
        'thin-thick-thin-small': 'solid',
        'thick-thin-medium': 'solid',
        'thin-thick-medium': 'solid',
        'thin-thick-thin-medium': 'solid',
        'thick-thin-large': 'solid',
        'thin-thick-thin-large': 'solid',
        'wavy': 'ridge',
        'double-wavy': 'ridge',
        'striped': 'ridge',
        'emboss': 'inset',
        'engrave': 'inset',
        'frame': 'ridge',
 }
 class RTFInput(InputFormatPlugin):
    name        = 'RTF Input'
    author      = 'Kovid Goyal'
    description = 'Convert RTF files to HTML'
    file_types  = set(['rtf'])
    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
        ofile = 'dataxml.xml'
        run_lev, debug_dir, indent_out = 1, None, 0
        if getattr(self.opts, 'debug_pipeline', None) is not None:
            try:
                os.mkdir('rtfdebug')
                debug_dir = 'rtfdebug'
                run_lev = 4
                indent_out = 1
                self.log('Running RTFParser in debug mode')
            except:
                self.log.warn('Impossible to run RTFParser in debug mode')
        parser = ParseRtf(
            in_file    = stream,
            out_file   = ofile,
            # Convert symbol fonts to unicode equivalents. Default
            # is 1
            convert_symbol = 1,
            # Convert Zapf fonts to unicode equivalents. Default
            # is 1.
            convert_zapf = 1,
            # Convert Wingding fonts to unicode equivalents.
            # Default is 1.
            convert_wingdings = 1,
            # Convert RTF caps to real caps.
            # Default is 1.
            convert_caps = 1,
            # Indent resulting XML.
            # Default is 0 (no indent).
            indent = indent_out,
            # Form lists from RTF. Default is 1.
            form_lists = 1,
            # Convert headings to sections. Default is 0.
            headings_to_sections = 1,
            # Group paragraphs with the same style name. Default is 1.
            group_styles = 1,
            # Group borders. Default is 1.
            group_borders = 1,
            # Write or do not write paragraphs. Default is 0.
            empty_paragraphs = 1,
            #debug
            deb_dir = debug_dir,
            run_level = run_lev,
        )
        parser.parse_rtf()
        with open(ofile, 'rb') as f:
            return f.read()
    def extract_images(self, picts):
        import imghdr
        self.log('Extracting images...')
        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]
        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            fmt = imghdr.what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = '%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
                # f.write(enc)
        return self.convert_images(imap)
    def convert_images(self, imap):
        self.default_img = None
        for count, val in imap.iteritems():
            try:
                imap[count] = self.convert_image(val)
            except:
                self.log.exception('Failed to convert', val)
        return imap
    def convert_image(self, name):
        if not name.endswith('.wmf'):
            return name
        try:
            return self.rasterize_wmf(name)
        except:
            self.log.exception('Failed to convert WMF image %r'%name)
        return self.replace_wmf(name)
    def replace_wmf(self, name):
        from calibre.ebooks import calibre_cover
        if self.default_img is None:
            self.default_img = calibre_cover('Conversion of WMF images is not supported',
            'Use Microsoft Word or OpenOffice to save this RTF file'
            ' as HTML and convert that in calibre.', title_size=36,
            author_size=20)
        name = name.replace('.wmf', '.jpg')
        with open(name, 'wb') as f:
            f.write(self.default_img)
        return name
    def rasterize_wmf(self, name):
        from calibre.utils.wmf.parse import wmf_unwrap
        with open(name, 'rb') as f:
            data = f.read()
        data = wmf_unwrap(data)
        name = name.replace('.wmf', '.png')
        with open(name, 'wb') as f:
            f.write(data)
        return name
    def write_inline_css(self, ic, border_styles):
        font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
                enumerate(ic.font_sizes)]
        color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
                enumerate(ic.colors)]
        css = textwrap.dedent('''
        span.none {
            text-decoration: none; font-weight: normal;
            font-style: normal; font-variant: normal
        }
        span.italics { font-style: italic }
        span.bold { font-weight: bold }
        span.small-caps { font-variant: small-caps }
        span.underlined { text-decoration: underline }
        span.strike-through { text-decoration: line-through }
        ''')
        css += '\n'+'\n'.join(font_size_classes)
        css += '\n' +'\n'.join(color_classes)
        for cls, val in border_styles.iteritems():
            css += '\n\n.%s {\n%s\n}'%(cls, val)
        with open('styles.css', 'ab') as f:
            f.write(css)
    def convert_borders(self, doc):
        border_styles = []
        style_map = {}
        for elem in doc.xpath(r'//*[local-name()="cell"]'):
            style = ['border-style: hidden', 'border-width: 1px',
                    'border-color: black']
            for x in ('bottom', 'top', 'left', 'right'):
                bs = elem.get('border-cell-%s-style'%x, None)
                if bs:
                    cbs = border_style_map.get(bs, 'solid')
                    style.append('border-%s-style: %s'%(x, cbs))
                bw = elem.get('border-cell-%s-line-width'%x, None)
                if bw:
                    style.append('border-%s-width: %spt'%(x, bw))
                bc = elem.get('border-cell-%s-color'%x, None)
                if bc:
                    style.append('border-%s-color: %s'%(x, bc))
            style = ';\n'.join(style)
            if style not in border_styles:
                border_styles.append(style)
            idx = border_styles.index(style)
            cls = 'border_style%d'%idx
            style_map[cls] = style
            elem.set('class', cls)
        return style_map
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from lxml import etree
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        from calibre.ebooks.rtf.input import InlineClass
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)
        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')
        self.log('Parsing XML...')
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)
        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
        extensions = { ('calibre', 'inline-class') : inline_class }
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = 'index.xhtml'
        with open(html, 'wb') as f:
            res = transform.tostring(result)
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            #clean multiple \n
            res = re.sub('\n+', '\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
                    # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(os.getcwd(), mi)
        opf.create_manifest([('index.xhtml', None)])
        opf.create_spine(['index.xhtml'])
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
--- a/src/calibre/ebooks/conversion/plugins/rtf_output.py
+++ b/src/calibre/ebooks/conversion/plugins/rtf_output.py
@ -6,7 +6,6 @@ __docformat__ = 'restructuredtext en'
 import os
 from calibre.ebooks.rtf.rtfml import RTFMLizer
 from calibre.customize.conversion import OutputFormatPlugin
 class RTFOutput(OutputFormatPlugin):
@ -16,6 +15,8 @@ class RTFOutput(OutputFormatPlugin):
    file_type = 'rtf'
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.rtf.rtfml import RTFMLizer
        rtfmlitzer = RTFMLizer(log)
        content = rtfmlitzer.extract_content(oeb_book, opts)
--- a/src/calibre/ebooks/conversion/plugins/snb_input.py
+++ b/src/calibre/ebooks/conversion/plugins/snb_input.py
@ -4,13 +4,11 @@ __license__ = 'GPL 3'
 __copyright__ = '2010, Li Fanxi <lifanxi@freemindworld.com>'
 __docformat__ = 'restructuredtext en'
-import os, uuid
+import os
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.snb.snbfile import SNBFile
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.filenames import ascii_filename
 from lxml import etree
 HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
@ -29,7 +27,12 @@ class SNBInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        import uuid
        from lxml import etree
        from calibre.ebooks.oeb.base import DirContainer
        from calibre.ebooks.snb.snbfile import SNBFile
        log.debug("Parsing SNB file...")
        snbFile = SNBFile()
        try:
--- a/src/calibre/ebooks/conversion/plugins/snb_output.py
+++ b/src/calibre/ebooks/conversion/plugins/snb_output.py
@ -6,12 +6,9 @@ __docformat__ = 'restructuredtext en'
 import os, string
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, OptionRecommendation
 from calibre.ptempfile import TemporaryDirectory
 from calibre.constants import __appname__, __version__
 from calibre.ebooks.snb.snbfile import SNBFile
 from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
 class SNBOutput(OutputFormatPlugin):
@ -49,6 +46,11 @@ class SNBOutput(OutputFormatPlugin):
     ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from lxml import etree
        from calibre.ebooks.snb.snbfile import SNBFile
        from calibre.ebooks.snb.snbml import SNBMLizer, ProcessFileName
        self.opts = opts
        from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
        try:
--- a/src/calibre/ebooks/conversion/plugins/tcr_input.py
+++ b/src/calibre/ebooks/conversion/plugins/tcr_input.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 from cStringIO import StringIO
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.compression.tcr import decompress
 class TCRInput(InputFormatPlugin):
@ -17,6 +16,8 @@ class TCRInput(InputFormatPlugin):
    file_types  = set(['tcr'])
    def convert(self, stream, options, file_ext, log, accelerators):
        from calibre.ebooks.compression.tcr import decompress
        log.info('Decompressing text...')
        raw_txt = decompress(stream)
@ -28,7 +29,7 @@ class TCRInput(InputFormatPlugin):
        txt_plugin = plugin_for_input_format('txt')
        for opt in txt_plugin.options:
            if not hasattr(self.options, opt.option.name):
-                setattr(self.options, opt.option.name, opt.recommended_value)
+                setattr(options, opt.option.name, opt.recommended_value)
        stream.seek(0)
        return txt_plugin.convert(stream, options,
--- a/src/calibre/ebooks/conversion/plugins/tcr_output.py
+++ b/src/calibre/ebooks/conversion/plugins/tcr_output.py
@ -8,8 +8,6 @@ import os
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.compression.tcr import compress
 class TCROutput(OutputFormatPlugin):
@ -25,6 +23,9 @@ class TCROutput(OutputFormatPlugin):
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.txt.txtml import TXTMLizer
        from calibre.ebooks.compression.tcr import compress
        close = False
        if not hasattr(output_path, 'write'):
            close = True
--- a/src/calibre/ebooks/conversion/plugins/txt_input.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_input.py
@ -8,14 +8,6 @@ import os
 from calibre import _ent_pat, walk, xml_entity_to_unicode
 from calibre.customize.conversion import InputFormatPlugin, OptionRecommendation
 from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
 from calibre.ebooks.chardet import detect
 from calibre.ebooks.txt.processor import convert_basic, convert_markdown, \
    separate_paragraphs_single_line, separate_paragraphs_print_formatted, \
    preserve_spaces, detect_paragraph_type, detect_formatting_type, \
    normalize_line_endings, convert_textile, remove_indents, block_to_single_line, \
    separate_hard_scene_breaks
 from calibre.utils.zipfile import ZipFile
 class TXTInput(InputFormatPlugin):
@ -61,6 +53,17 @@ class TXTInput(InputFormatPlugin):
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
        from calibre.ebooks.chardet import detect
        from calibre.utils.zipfile import ZipFile
        from calibre.ebooks.txt.processor import (convert_basic,
                convert_markdown, separate_paragraphs_single_line,
                separate_paragraphs_print_formatted, preserve_spaces,
                detect_paragraph_type, detect_formatting_type,
                normalize_line_endings, convert_textile, remove_indents,
                block_to_single_line, separate_hard_scene_breaks)
        self.log = log
        txt = ''
        log.debug('Reading text from file...')
--- a/src/calibre/ebooks/conversion/plugins/txt_output.py
+++ b/src/calibre/ebooks/conversion/plugins/txt_output.py
@ -7,15 +7,12 @@ __docformat__ = 'restructuredtext en'
 import os
 import shutil
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 from calibre.ptempfile import TemporaryDirectory, TemporaryFile
-from calibre.utils.cleantext import clean_ascii_chars
+
-from calibre.utils.zipfile import ZipFile
+NEWLINE_TYPES = ['system', 'unix', 'old_mac', 'windows']
 class TXTOutput(OutputFormatPlugin):
@ -26,11 +23,11 @@ class TXTOutput(OutputFormatPlugin):
    options = set([
        OptionRecommendation(name='newline', recommended_value='system',
            level=OptionRecommendation.LOW,
-            short_switch='n', choices=TxtNewlines.NEWLINE_TYPES.keys(),
+            short_switch='n', choices=NEWLINE_TYPES,
            help=_('Type of newline to use. Options are %s. Default is \'system\'. '
                'Use \'old_mac\' for compatibility with Mac OS 9 and earlier. '
                'For Mac OS X use \'unix\'. \'system\' will default to the newline '
-                'type used by this OS.') % sorted(TxtNewlines.NEWLINE_TYPES.keys())),
+                'type used by this OS.') % sorted(NEWLINE_TYPES)),
        OptionRecommendation(name='txt_output_encoding', recommended_value='utf-8',
            level=OptionRecommendation.LOW,
            help=_('Specify the character encoding of the output document. ' \
@ -76,6 +73,11 @@ class TXTOutput(OutputFormatPlugin):
     ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.txt.txtml import TXTMLizer
        from calibre.utils.cleantext import clean_ascii_chars
        from calibre.ebooks.txt.newlines import specified_newlines, TxtNewlines
        if opts.txt_output_formatting.lower() == 'markdown':
            from calibre.ebooks.txt.markdownml import MarkdownMLizer
            self.writer = MarkdownMLizer(log)
@ -116,6 +118,9 @@ class TXTZOutput(TXTOutput):
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        from calibre.ebooks.oeb.base import OEB_IMAGES
        from calibre.utils.zipfile import ZipFile
        from lxml import etree
        with TemporaryDirectory('_txtz_output') as tdir:
            # TXT
            txt_name = 'index.txt'
--- a/src/calibre/ebooks/epub/fix/epubcheck.py
+++ b/src/calibre/ebooks/epub/fix/epubcheck.py
@ -6,7 +6,6 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.epub.fix import ePubFixer, InvalidEpub
 from calibre.utils.date import parse_date, strptime
 class Epubcheck(ePubFixer):
@ -35,6 +34,8 @@ class Epubcheck(ePubFixer):
        return 'epubcheck'
    def fix_pubdates(self):
        from calibre.utils.date import parse_date, strptime
        dirtied = False
        opf = self.container.opf
        for dcdate in opf.xpath('//dc:date',
--- a/src/calibre/ebooks/html/init.py
+++ b/src/calibre/ebooks/html/init.py
@ -8,12 +8,13 @@ __docformat__ = 'restructuredtext en'
 import re
 from lxml.etree import tostring as _tostring
 def tostring(root, strip_comments=False, pretty_print=False):
    '''
    Serialize processed XHTML.
    '''
    from lxml.etree import tostring as _tostring
    root.set('xmlns', 'http://www.w3.org/1999/xhtml')
    root.set('{http://www.w3.org/1999/xhtml}xlink', 'http://www.w3.org/1999/xlink')
    for x in root.iter():
--- a/src/calibre/ebooks/html/input.py
+++ b/src/calibre/ebooks/html/input.py
@ -11,19 +11,13 @@ __docformat__ = 'restructuredtext en'
 Input plugin for HTML or OPF ebooks.
 '''
-import os, re, sys, uuid, tempfile, errno as gerrno
+import os, re, sys,  errno as gerrno
 from urlparse import urlparse, urlunparse
-from urllib import unquote, quote
+from urllib import unquote
 from functools import partial
 from itertools import izip
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.ebooks.chardet import detect_xml_encoding
-from calibre.customize.conversion import OptionRecommendation
+from calibre.constants import iswindows
 from calibre.constants import islinux, isbsd, iswindows
 from calibre import unicode_path, as_unicode
 from calibre.utils.localization import get_lang
 from calibre.utils.filenames import ascii_filename
 class Link(object):
    '''
@ -241,262 +235,4 @@ def get_filelist(htmlfile, dir, opts, log):
    return filelist
 class HTMLInput(InputFormatPlugin):
    name        = 'HTML Input'
    author      = 'Kovid Goyal'
    description = 'Convert HTML and OPF files to an OEB'
    file_types  = set(['opf', 'html', 'htm', 'xhtml', 'xhtm', 'shtm', 'shtml'])
    options = set([
        OptionRecommendation(name='breadth_first',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Traverse links in HTML files breadth first. Normally, '
                    'they are traversed depth first.'
                   )
        ),
        OptionRecommendation(name='max_levels',
            recommended_value=5, level=OptionRecommendation.LOW,
            help=_('Maximum levels of recursion when following links in '
                   'HTML files. Must be non-negative. 0 implies that no '
                   'links in the root HTML file are followed. Default is '
                   '%default.'
                   )
        ),
        OptionRecommendation(name='dont_package',
            recommended_value=False, level=OptionRecommendation.LOW,
            help=_('Normally this input plugin re-arranges all the input '
                'files into a standard folder hierarchy. Only use this option '
                'if you know what you are doing as it can result in various '
                'nasty side effects in the rest of the conversion pipeline.'
                )
        ),
    ])
    def convert(self, stream, opts, file_ext, log,
                accelerators):
        self._is_case_sensitive = None
        basedir = os.getcwd()
        self.opts = opts
        fname = None
        if hasattr(stream, 'name'):
            basedir = os.path.dirname(stream.name)
            fname = os.path.basename(stream.name)
        if file_ext != 'opf':
            if opts.dont_package:
                raise ValueError('The --dont-package option is not supported for an HTML input file')
            from calibre.ebooks.metadata.html import get_metadata
            mi = get_metadata(stream)
            if fname:
                from calibre.ebooks.metadata.meta import metadata_from_filename
                fmi = metadata_from_filename(fname)
                fmi.smart_update(mi)
                mi = fmi
            oeb = self.create_oebbook(stream.name, basedir, opts, log, mi)
            return oeb
        from calibre.ebooks.conversion.plumber import create_oebbook
        return create_oebbook(log, stream.name, opts,
                encoding=opts.input_encoding)
    def is_case_sensitive(self, path):
        if getattr(self, '_is_case_sensitive', None) is not None:
            return self._is_case_sensitive
        if not path or not os.path.exists(path):
            return islinux or isbsd
        self._is_case_sensitive = not (os.path.exists(path.lower()) \
                and os.path.exists(path.upper()))
        return self._is_case_sensitive
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        from calibre.ebooks.conversion.plumber import create_oebbook
        from calibre.ebooks.oeb.base import (DirContainer,
            rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
            xpath)
        from calibre import guess_type
        from calibre.ebooks.oeb.transforms.metadata import \
            meta_info_to_oeb_metadata
        import cssutils, logging
        cssutils.log.setLevel(logging.WARN)
        self.OEB_STYLES = OEB_STYLES
        oeb = create_oebbook(log, None, opts, self,
                encoding=opts.input_encoding, populate=False)
        self.oeb = oeb
        metadata = oeb.metadata
        meta_info_to_oeb_metadata(mi, metadata, log)
        if not metadata.language:
            oeb.logger.warn(u'Language not specified')
            metadata.add('language', get_lang().replace('_', '-'))
        if not metadata.creator:
            oeb.logger.warn('Creator not specified')
            metadata.add('creator', self.oeb.translate(__('Unknown')))
        if not metadata.title:
            oeb.logger.warn('Title not specified')
            metadata.add('title', self.oeb.translate(__('Unknown')))
        bookid = str(uuid.uuid4())
        metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
        for ident in metadata.identifier:
            if 'id' in ident.attrib:
                self.oeb.uid = metadata.identifier[0]
                break
        filelist = get_filelist(htmlpath, basedir, opts, log)
        filelist = [f for f in filelist if not f.is_binary]
        htmlfile_map = {}
        for f in filelist:
            path = f.path
            oeb.container = DirContainer(os.path.dirname(path), log,
                    ignore_opf=True)
            bname = os.path.basename(path)
            id, href = oeb.manifest.generate(id='html',
                    href=ascii_filename(bname))
            htmlfile_map[path] = href
            item = oeb.manifest.add(id, href, 'text/html')
            item.html_input_href = bname
            oeb.spine.add(item, True)
        self.added_resources = {}
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
            if not self.is_case_sensitive(path):
                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urldefrag
        self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
        self.log('Rewriting HTML links')
        for f in filelist:
            path = f.path
            dpath = os.path.dirname(path)
            oeb.container = DirContainer(dpath, log, ignore_opf=True)
            item = oeb.manifest.hrefs[htmlfile_map[path]]
            rewrite_links(item.data, partial(self.resource_adder, base=dpath))
        for item in oeb.manifest.values():
            if item.media_type in self.OEB_STYLES:
                dpath = None
                for path, href in self.added_resources.items():
                    if href == item.href:
                        dpath = os.path.dirname(path)
                        break
                cssutils.replaceUrls(item.data,
                        partial(self.resource_adder, base=dpath))
        toc = self.oeb.toc
        self.oeb.auto_generated_toc = True
        titles = []
        headers = []
        for item in self.oeb.spine:
            if not item.linear: continue
            html = item.data
            title = ''.join(xpath(html, '/h:html/h:head/h:title/text()'))
            title = re.sub(r'\s+', ' ', title.strip())
            if title:
                titles.append(title)
            headers.append('(unlabled)')
            for tag in ('h1', 'h2', 'h3', 'h4', 'h5', 'strong'):
                expr = '/h:html/h:body//h:%s[position()=1]/text()'
                header = ''.join(xpath(html, expr % tag))
                header = re.sub(r'\s+', ' ', header.strip())
                if header:
                    headers[-1] = header
                    break
        use = titles
        if len(titles) > len(set(titles)):
            use = headers
        for title, item in izip(use, self.oeb.spine):
            if not item.linear: continue
            toc.add(title, item.href)
        oeb.container = DirContainer(os.getcwdu(), oeb.log, ignore_opf=True)
        return oeb
    def link_to_local_path(self, link_, base=None):
        if not isinstance(link_, unicode):
            try:
                link_ = link_.decode('utf-8', 'error')
            except:
                self.log.warn('Failed to decode link %r. Ignoring'%link_)
                return None, None
        try:
            l = Link(link_, base if base else os.getcwdu())
        except:
            self.log.exception('Failed to process link: %r'%link_)
            return None, None
        if l.path is None:
            # Not a local resource
            return None, None
        link = l.path.replace('/', os.sep).strip()
        frag = l.fragment
        if not link:
            return None, None
        return link, frag
    def resource_adder(self, link_, base=None):
        link, frag = self.link_to_local_path(link_, base=base)
        if link is None:
            return link_
        try:
            if base and not os.path.isabs(link):
                link = os.path.join(base, link)
            link = os.path.abspath(link)
        except:
            return link_
        if not os.access(link, os.R_OK):
            return link_
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
        if not self.is_case_sensitive(tempfile.gettempdir()):
            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(id='added',
                    href=bhref)
            guessed = self.guess_type(href)[0]
            media_type = guessed or self.BINARY_MIME
            if media_type == 'text/plain':
                self.log.warn('Ignoring link to text file %r'%link_)
                return None
            self.oeb.log.debug('Added', link)
            self.oeb.container = self.DirContainer(os.path.dirname(link),
                    self.oeb.log, ignore_opf=True)
            # Load into memory
            item = self.oeb.manifest.add(id, href, media_type)
            # bhref refers to an already existing file. The read() method of
            # DirContainer will call unquote on it before trying to read the
            # file, therefore we quote it here.
            if isinstance(bhref, unicode):
                bhref = bhref.encode('utf-8')
            item.html_input_href = quote(bhref).decode('utf-8')
            if guessed in self.OEB_STYLES:
                item.override_css_fetch = partial(
                        self.css_import_handler, os.path.dirname(link))
            item.data
            self.added_resources[link] = href
        nlink = self.added_resources[link]
        if frag:
            nlink = '#'.join((nlink, frag))
        return nlink
    def css_import_handler(self, base, href):
        link, frag = self.link_to_local_path(href, base=base)
        if link is None or not os.access(link, os.R_OK) or os.path.isdir(link):
            return (None, None)
        try:
            raw = open(link, 'rb').read().decode('utf-8', 'replace')
            raw = self.oeb.css_preprocessor(raw, add_namespace=True)
        except:
            self.log.exception('Failed to read CSS file: %r'%link)
            return (None, None)
        return (None, raw)
--- a/src/calibre/ebooks/lrf/init.py
+++ b/src/calibre/ebooks/lrf/init.py
@ -4,7 +4,6 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 This package contains logic to read and write LRF files.
 The LRF file format is documented at U{http://www.sven.de/librie/Librie/LrfFormat}.
 """
 from uuid import uuid4
 from calibre.ebooks.lrf.pylrs.pylrs import Book as _Book
 from calibre.ebooks.lrf.pylrs.pylrs import TextBlock, Header, \
@ -60,6 +59,7 @@ def find_custom_fonts(options, logger):
 def Book(options, logger, font_delta=0, header=None,
         profile=PRS500_PROFILE, **settings):
    from uuid import uuid4
    ps = {}
    ps['topmargin']      = options.top_margin
    ps['evensidemargin'] = options.left_margin
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -6,12 +6,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, textwrap, sys, operator
+import textwrap, operator
 from copy import deepcopy, copy
 from lxml import etree
 from calibre.customize.conversion import InputFormatPlugin
 from calibre import guess_type
 class Canvas(etree.XSLTExtension):
@ -406,76 +405,4 @@ class Styles(etree.XSLTExtension):
 class LRFInput(InputFormatPlugin):
    name        = 'LRF Input'
    author      = 'Kovid Goyal'
    description = 'Convert LRF files to HTML'
    file_types  = set(['lrf'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.log = log
        self.log('Generating XML')
        from calibre.ebooks.lrf.lrfparser import LRFDocument
        d = LRFDocument(stream)
        d.parse()
        xml = d.to_xml(write_files=True)
        if options.verbose > 2:
            open('lrs.xml', 'wb').write(xml.encode('utf-8'))
        parser = etree.XMLParser(no_network=True, huge_tree=True)
        try:
            doc = etree.fromstring(xml, parser=parser)
        except:
            self.log.warn('Failed to parse XML. Trying to recover')
            parser = etree.XMLParser(no_network=True, huge_tree=True,
                    recover=True)
            doc = etree.fromstring(xml, parser=parser)
        char_button_map = {}
        for x in doc.xpath('//CharButton[@refobj]'):
            ro = x.get('refobj')
            jump_button = doc.xpath('//*[@objid="%s"]'%ro)
            if jump_button:
                jump_to = jump_button[0].xpath('descendant::JumpTo[@refpage and @refobj]')
                if jump_to:
                    char_button_map[ro] = '%s.xhtml#%s'%(jump_to[0].get('refpage'),
                            jump_to[0].get('refobj'))
        plot_map = {}
        for x in doc.xpath('//Plot[@refobj]'):
            ro = x.get('refobj')
            image = doc.xpath('//Image[@objid="%s" and @refstream]'%ro)
            if image:
                imgstr = doc.xpath('//ImageStream[@objid="%s" and @file]'%
                    image[0].get('refstream'))
                if imgstr:
                    plot_map[ro] = imgstr[0].get('file')
        self.log('Converting XML to HTML...')
        styledoc = etree.fromstring(P('templates/lrf.xsl', data=True))
        media_type = MediaType()
        styles = Styles()
        text_block = TextBlock(styles, char_button_map, plot_map, log)
        canvas = Canvas(doc, styles, text_block, log)
        image_block = ImageBlock(canvas)
        ruled_line = RuledLine()
        extensions = {
                ('calibre', 'media-type') : media_type,
                ('calibre', 'text-block') : text_block,
                ('calibre', 'ruled-line') : ruled_line,
                ('calibre', 'styles')     : styles,
                ('calibre', 'canvas')     : canvas,
                ('calibre', 'image-block'): image_block,
                }
        transform = etree.XSLT(styledoc, extensions=extensions)
        try:
            result = transform(doc)
        except RuntimeError:
            sys.setrecursionlimit(5000)
            result = transform(doc)
        with open('content.opf', 'wb') as f:
            f.write(result)
        styles.write()
        return os.path.abspath('content.opf')
--- a/src/calibre/ebooks/metadata/book/json_codec.py
+++ b/src/calibre/ebooks/metadata/book/json_codec.py
@ -12,7 +12,6 @@ from calibre.ebooks.metadata.book import SERIALIZABLE_FIELDS
 from calibre.constants import filesystem_encoding, preferred_encoding
 from calibre.library.field_metadata import FieldMetadata
 from calibre.utils.date import parse_date, isoformat, UNDEFINED_DATE, local_tz
 from calibre.utils.magick import Image
 from calibre import isbytestring
 # Translate datetimes to and from strings. The string form is the datetime in
@ -37,6 +36,8 @@ def encode_thumbnail(thumbnail):
    '''
    Encode the image part of a thumbnail, then return the 3 part tuple
    '''
    from calibre.utils.magick import Image
    if thumbnail is None:
        return None
    if not isinstance(thumbnail, (tuple, list)):
--- a/src/calibre/ebooks/metadata/epub.py
+++ b/src/calibre/ebooks/metadata/epub.py
@ -129,9 +129,57 @@ class OCFDirReader(OCFReader):
    def open(self, path, *args, **kwargs):
        return open(os.path.join(self.root, path), *args, **kwargs)
-def get_cover(opf, opf_path, stream, reader=None):
+def render_cover(opf, opf_path, zf, reader=None):
    from calibre.ebooks import render_html_svg_workaround
    from calibre.utils.logging import default_log
    cpage = opf.first_spine_item()
    if not cpage:
        return
    if reader is not None and reader.encryption_meta.is_encrypted(cpage):
        return
    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            zf.extractall()
            opf_path = opf_path.replace('/', os.sep)
            cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
            if not os.path.exists(cpage):
                return
            if isosx:
                # On OS X trying to render a HTML cover which uses embedded
                # fonts more than once in the same process causes a crash in Qt
                # so be safe and remove the fonts as well as any @font-face
                # rules
                for f in walk('.'):
                    if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
                        os.remove(f)
                ffpat = re.compile(br'@font-face.*?{.*?}',
                        re.DOTALL|re.IGNORECASE)
                with open(cpage, 'r+b') as f:
                    raw = f.read()
                    f.truncate(0)
                    raw = ffpat.sub(b'', raw)
                    f.write(raw)
                from calibre.ebooks.chardet import xml_to_unicode
                raw = xml_to_unicode(raw,
                        strip_encoding_pats=True, resolve_entities=True)[0]
                from lxml import html
                for link in html.fromstring(raw).xpath('//link'):
                    href = link.get('href', '')
                    if href:
                        path = os.path.join(os.path.dirname(cpage), href)
                        if os.path.exists(path):
                            with open(path, 'r+b') as f:
                                raw = f.read()
                                f.truncate(0)
                                raw = ffpat.sub(b'', raw)
                                f.write(raw)
            return render_html_svg_workaround(cpage, default_log)
 def get_cover(opf, opf_path, stream, reader=None):
    raster_cover = opf.raster_cover
    stream.seek(0)
    zf = ZipFile(stream)
@ -152,27 +200,7 @@ def get_cover(opf, opf_path, stream, reader=None):
            zf.close()
            return data
-    cpage = opf.first_spine_item()
+    return render_cover(opf, opf_path, zf, reader=reader)
    if not cpage:
        return
    if reader is not None and reader.encryption_meta.is_encrypted(cpage):
        return
    with TemporaryDirectory('_epub_meta') as tdir:
        with CurrentDir(tdir):
            zf.extractall()
            if isosx:
                # On OS X trying to render an HTML cover which uses embedded
                # fonts more than once in the same process causes a crash in Qt
                # so be safe and remove the fonts.
                for f in walk('.'):
                    if os.path.splitext(f)[1].lower() in ('.ttf', '.otf'):
                        os.remove(f)
            opf_path = opf_path.replace('/', os.sep)
            cpage = os.path.join(tdir, os.path.dirname(opf_path), cpage)
            if not os.path.exists(cpage):
                return
            return render_html_svg_workaround(cpage, default_log)
 def get_metadata(stream, extract_cover=True):
    """ Return metadata as a :class:`Metadata` object """
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -222,6 +222,11 @@ def forked_read_metadata(path, tdir):
    from calibre.ebooks.metadata.opf2 import metadata_to_opf
    with open(path, 'rb') as f:
        fmt = os.path.splitext(path)[1][1:].lower()
        f.seek(0, 2)
        sz = f.tell()
        with open(os.path.join(tdir, 'size.txt'), 'wb') as s:
            s.write(str(sz).encode('ascii'))
        f.seek(0)
        mi = get_metadata(f, fmt)
    if mi.cover_data and mi.cover_data[1]:
        with open(os.path.join(tdir, 'cover.jpg'), 'wb') as f:
--- a/src/calibre/ebooks/metadata/opf2.py
+++ b/src/calibre/ebooks/metadata/opf2.py
@ -1019,6 +1019,11 @@ class OPF(object): # {{{
                    mt = item.get('media-type', '')
                    if 'xml' not in mt:
                        return item.get('href', None)
            for item in self.itermanifest():
                if item.get('href', None) == cover_id:
                    mt = item.get('media-type', '')
                    if mt.startswith('image/'):
                        return item.get('href', None)
    @dynamic_property
    def cover(self):
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -12,19 +12,14 @@ from urllib import urlencode
 from threading import Thread
 from Queue import Queue, Empty
 from lxml.html import tostring
 from calibre import as_unicode
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import (Source, Option, fixcase,
        fixauthors)
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
 from calibre.utils.localization import canonicalize_lang
 from calibre.utils.soupparser import fromstring
 class Worker(Thread): # Get details {{{
@ -43,6 +38,8 @@ class Worker(Thread): # Get details {{{
        self.browser = browser.clone_browser()
        self.cover_url = self.amazon_id = self.isbn = None
        self.domain = domain
        from lxml.html import tostring
        self.tostring = tostring
        months = {
                'de': {
@ -176,6 +173,10 @@ class Worker(Thread): # Get details {{{
            self.log.exception('get_details failed for url: %r'%self.url)
    def get_details(self):
        from calibre.utils.cleantext import clean_ascii_chars
        from calibre.utils.soupparser import fromstring
        from calibre.ebooks.chardet import xml_to_unicode
        try:
            raw = self.browser.open_novisit(self.url, timeout=self.timeout).read().strip()
        except Exception as e:
@ -210,7 +211,7 @@ class Worker(Thread): # Get details {{{
        errmsg = root.xpath('//*[@id="errorMessage"]')
        if errmsg:
            msg = 'Failed to parse amazon details page: %r'%self.url
-            msg += tostring(errmsg, method='text', encoding=unicode).strip()
+            msg += self.tostring(errmsg, method='text', encoding=unicode).strip()
            self.log.error(msg)
            return
@ -322,10 +323,10 @@ class Worker(Thread): # Get details {{{
        tdiv = root.xpath('//h1[contains(@class, "parseasinTitle")]')[0]
        actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
        if actual_title:
-            title = tostring(actual_title[0], encoding=unicode,
+            title = self.tostring(actual_title[0], encoding=unicode,
                    method='text').strip()
        else:
-            title = tostring(tdiv, encoding=unicode, method='text').strip()
+            title = self.tostring(tdiv, encoding=unicode, method='text').strip()
        return re.sub(r'[(\[].*[)\]]', '', title).strip()
    def parse_authors(self, root):
@ -337,7 +338,7 @@ class Worker(Thread): # Get details {{{
                    ''')
        for x in aname:
            x.tail = ''
-        authors = [tostring(x, encoding=unicode, method='text').strip() for x
+        authors = [self.tostring(x, encoding=unicode, method='text').strip() for x
                in aname]
        authors = [a for a in authors if a]
        return authors
@ -356,6 +357,8 @@ class Worker(Thread): # Get details {{{
                    return float(m.group(1))/float(m.group(3)) * 5
    def parse_comments(self, root):
        from calibre.library.comments import sanitize_comments_html
        desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
        if desc:
            desc = desc[0]
@ -365,7 +368,7 @@ class Worker(Thread): # Get details {{{
            for a in desc.xpath('descendant::a[@href]'):
                del a.attrib['href']
                a.tag = 'span'
-            desc = tostring(desc, method='html', encoding=unicode).strip()
+            desc = self.tostring(desc, method='html', encoding=unicode).strip()
            # Encoding bug in Amazon data U+fffd (replacement char)
            # in some examples it is present in place of '
@ -602,6 +605,11 @@ class Amazon(Source):
        Note this method will retry without identifiers automatically if no
        match is found with identifiers.
        '''
        from lxml.html import tostring
        from calibre.utils.cleantext import clean_ascii_chars
        from calibre.utils.soupparser import fromstring
        from calibre.ebooks.chardet import xml_to_unicode
        query, domain = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if query is None:
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -12,7 +12,6 @@ from future_builtins import map
 from calibre import browser, random_user_agent
 from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import capitalize, lower, upper
@ -34,6 +33,7 @@ msprefs.defaults['fewer_tags'] = True
 msprefs.defaults['cover_priorities'] = {'Google':2}
 def create_log(ostream=None):
    from calibre.utils.logging import ThreadSafeLog, FileStream
    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
    log.outputs = [FileStream(ostream)]
    return log
--- a/src/calibre/ebooks/metadata/sources/douban.py
+++ b/src/calibre/ebooks/metadata/sources/douban.py
@ -12,14 +12,10 @@ from urllib import urlencode
 from functools import partial
 from Queue import Queue, Empty
 from lxml import etree
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.date import parse_date, utcnow
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre import as_unicode
 NAMESPACES = {
@ -28,22 +24,6 @@ NAMESPACES = {
              'db': 'http://www.douban.com/xmlns/',
              'gd': 'http://schemas.google.com/g/2005'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 title          = XPath('descendant::atom:title')
 description    = XPath('descendant::atom:summary')
 publisher      = XPath("descendant::db:attribute[@name='publisher']")
 isbn           = XPath("descendant::db:attribute[@name='isbn13']")
 date           = XPath("descendant::db:attribute[@name='pubdate']")
 creator        = XPath("descendant::db:attribute[@name='author']")
 booktag        = XPath("descendant::db:tag/attribute::name")
 rating         = XPath("descendant::gd:rating/attribute::average")
 cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
 def get_details(browser, url, timeout): # {{{
    try:
        if Douban.DOUBAN_API_KEY and Douban.DOUBAN_API_KEY != '':
@ -61,6 +41,25 @@ def get_details(browser, url, timeout): # {{{
 # }}}
 def to_metadata(browser, log, entry_, timeout): # {{{
    from lxml import etree
    from calibre.ebooks.chardet import xml_to_unicode
    from calibre.utils.date import parse_date, utcnow
    from calibre.utils.cleantext import clean_ascii_chars
    XPath = partial(etree.XPath, namespaces=NAMESPACES)
    entry          = XPath('//atom:entry')
    entry_id       = XPath('descendant::atom:id')
    title          = XPath('descendant::atom:title')
    description    = XPath('descendant::atom:summary')
    publisher      = XPath("descendant::db:attribute[@name='publisher']")
    isbn           = XPath("descendant::db:attribute[@name='isbn13']")
    date           = XPath("descendant::db:attribute[@name='pubdate']")
    creator        = XPath("descendant::db:attribute[@name='author']")
    booktag        = XPath("descendant::db:tag/attribute::name")
    rating         = XPath("descendant::gd:rating/attribute::average")
    cover_url      = XPath("descendant::atom:link[@rel='image']/attribute::href")
    def get_text(extra, x):
        try:
            ans = x(extra)
@ -275,6 +274,7 @@ class Douban(Source):
    def get_all_details(self, br, log, entries, abort, # {{{
            result_queue, timeout):
        from lxml import etree
        for relevance, i in enumerate(entries):
            try:
                ans = to_metadata(br, log, i, timeout)
@ -298,6 +298,13 @@ class Douban(Source):
    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.utils.cleantext import clean_ascii_chars
        XPath = partial(etree.XPath, namespaces=NAMESPACES)
        entry          = XPath('//atom:entry')
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if not query:
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -12,8 +12,6 @@ from urllib import urlencode
 from functools import partial
 from Queue import Queue, Empty
 from lxml import etree
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
@ -29,23 +27,6 @@ NAMESPACES = {
              'dc'   : 'http://purl.org/dc/terms',
              'gd'   : 'http://schemas.google.com/g/2005'
            }
 XPath = partial(etree.XPath, namespaces=NAMESPACES)
 total_results  = XPath('//openSearch:totalResults')
 start_index    = XPath('//openSearch:startIndex')
 items_per_page = XPath('//openSearch:itemsPerPage')
 entry          = XPath('//atom:entry')
 entry_id       = XPath('descendant::atom:id')
 creator        = XPath('descendant::dc:creator')
 identifier     = XPath('descendant::dc:identifier')
 title          = XPath('descendant::dc:title')
 date           = XPath('descendant::dc:date')
 publisher      = XPath('descendant::dc:publisher')
 subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
 rating         = XPath('descendant::gd:rating[@average]')
 def get_details(browser, url, timeout): # {{{
    try:
        raw = browser.open_novisit(url, timeout=timeout).read()
@ -61,6 +42,24 @@ def get_details(browser, url, timeout): # {{{
 # }}}
 def to_metadata(browser, log, entry_, timeout): # {{{
    from lxml import etree
    XPath = partial(etree.XPath, namespaces=NAMESPACES)
    # total_results  = XPath('//openSearch:totalResults')
    # start_index    = XPath('//openSearch:startIndex')
    # items_per_page = XPath('//openSearch:itemsPerPage')
    entry          = XPath('//atom:entry')
    entry_id       = XPath('descendant::atom:id')
    creator        = XPath('descendant::dc:creator')
    identifier     = XPath('descendant::dc:identifier')
    title          = XPath('descendant::dc:title')
    date           = XPath('descendant::dc:date')
    publisher      = XPath('descendant::dc:publisher')
    subject        = XPath('descendant::dc:subject')
    description    = XPath('descendant::dc:description')
    language       = XPath('descendant::dc:language')
    rating         = XPath('descendant::gd:rating[@average]')
    def get_text(extra, x):
        try:
@ -266,6 +265,7 @@ class GoogleBooks(Source):
    def get_all_details(self, br, log, entries, abort, # {{{
            result_queue, timeout):
        from lxml import etree
        for relevance, i in enumerate(entries):
            try:
                ans = to_metadata(br, log, i, timeout)
@ -289,6 +289,10 @@ class GoogleBooks(Source):
    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
            identifiers={}, timeout=30):
        from lxml import etree
        XPath = partial(etree.XPath, namespaces=NAMESPACES)
        entry          = XPath('//atom:entry')
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if not query:
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -9,12 +9,9 @@ __docformat__ = 'restructuredtext en'
 from urllib import quote
 from lxml import etree
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source, Option
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.utils.icu import lower
 from calibre.ebooks.metadata.book.base import Metadata
@ -122,6 +119,7 @@ class ISBNDB(Source):
            result_queue.put(result)
    def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
        from lxml import etree
        def tostring(x):
            if x is None:
@ -198,6 +196,10 @@ class ISBNDB(Source):
    def make_query(self, q, abort, title=None, authors=None, identifiers={},
            max_pages=10, timeout=30):
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.utils.cleantext import clean_ascii_chars
        page_num = 1
        parser = etree.XMLParser(recover=True, no_network=True)
        br = self.browser
--- a/src/calibre/ebooks/metadata/sources/overdrive.py
+++ b/src/calibre/ebooks/metadata/sources/overdrive.py
@ -9,18 +9,14 @@ __docformat__ = 'restructuredtext en'
 '''
 Fetch metadata using Overdrive Content Reserve
 '''
-import re, random, mechanize, copy, json
+import re, random, copy, json
 from threading import RLock
 from Queue import Queue, Empty
 from lxml import html
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source, Option
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.soupparser import fromstring
 ovrdrv_data_cache = {}
 cache_lock = RLock()
@ -80,6 +76,7 @@ class OverDrive(Source):
    def download_cover(self, log, result_queue, abort, # {{{
            title=None, authors=None, identifiers={}, timeout=30):
        import mechanize
        cached_url = self.get_cached_cover_url(identifiers)
        if cached_url is None:
            log.info('No cached cover found, running identify')
@ -170,6 +167,7 @@ class OverDrive(Source):
        this page attempts to set a cookie that Mechanize doesn't like
        copy the cookiejar to a separate instance and make a one-off request with the temp cookiejar
        '''
        import mechanize
        goodcookies = br._ua_handlers['_cookies'].cookiejar
        clean_cj = mechanize.CookieJar()
        cookies_to_copy = []
@ -187,6 +185,7 @@ class OverDrive(Source):
        br.set_cookiejar(clean_cj)
    def overdrive_search(self, br, log, q, title, author):
        import mechanize
        # re-initialize the cookiejar to so that it's clean
        clean_cj = mechanize.CookieJar()
        br.set_cookiejar(clean_cj)
@ -303,6 +302,7 @@ class OverDrive(Source):
            return ''
    def overdrive_get_record(self, br, log, q, ovrdrv_id):
        import mechanize
        search_url = q+'SearchResults.aspx?ReserveID={'+ovrdrv_id+'}'
        results_url = q+'SearchResults.svc/GetResults?sEcho=1&iColumns=18&sColumns=ReserveID%2CTitle%2CSubtitle%2CEdition%2CSeries%2CPublisher%2CFormat%2CFormatID%2CCreators%2CThumbImage%2CShortDescription%2CWorldCatLink%2CExcerptLink%2CCreatorFile%2CSortTitle%2CAvailableToLibrary%2CAvailableToRetailer%2CRelevancyRank&iDisplayStart=0&iDisplayLength=10&sSearch=&bEscapeRegex=true&iSortingCols=1&iSortCol_0=17&sSortDir_0=asc'
@ -393,6 +393,11 @@ class OverDrive(Source):
    def get_book_detail(self, br, metadata_url, mi, ovrdrv_id, log):
        from lxml import html
        from calibre.ebooks.chardet import xml_to_unicode
        from calibre.utils.soupparser import fromstring
        from calibre.library.comments import sanitize_comments_html
        try:
            raw = br.open_novisit(metadata_url).read()
        except Exception, e:
--- a/src/calibre/ebooks/metadata/sources/ozon.py
+++ b/src/calibre/ebooks/metadata/sources/ozon.py
@ -6,15 +6,11 @@ __copyright__ = '2011, Roman Mukhin <ramses_ru at hotmail.com>'
 __docformat__ = 'restructuredtext en'
 import re
 import urllib2
 import datetime
 from urllib import quote_plus
 from Queue import Queue, Empty
-from lxml import etree, html
+
 from calibre import as_unicode
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.ebooks.metadata.book.base import Metadata
@ -43,6 +39,7 @@ class Ozon(Source):
    isbnRegex = re.compile(isbnPattern)
    def get_book_url(self, identifiers): # {{{
        import urllib2
        ozon_id = identifiers.get('ozon', None)
        res = None
        if ozon_id:
@ -81,6 +78,9 @@ class Ozon(Source):
    def identify(self, log, result_queue, abort, title=None, authors=None,
            identifiers={}, timeout=30): # {{{
        from lxml import etree
        from calibre.ebooks.chardet import xml_to_unicode
        if not self.is_configured():
            return
        query = self.create_query(log, title=title, authors=authors, identifiers=identifiers)
@ -283,6 +283,9 @@ class Ozon(Source):
    # }}}
    def get_book_details(self, log, metadata, timeout): # {{{
        from lxml import html, etree
        from calibre.ebooks.chardet import xml_to_unicode
        url = self.get_book_url(metadata.get_identifiers())[2]
        raw = self.browser.open_novisit(url, timeout=timeout).read()
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -12,7 +12,6 @@ from lxml import etree
 from odf.odf2xhtml import ODF2XHTML
 from calibre import CurrentDir, walk
 from calibre.customize.conversion import InputFormatPlugin
 class Extract(ODF2XHTML):
@ -178,16 +177,4 @@ class Extract(ODF2XHTML):
            return os.path.abspath('metadata.opf')
 class ODTInput(InputFormatPlugin):
    name        = 'ODT Input'
    author      = 'Kovid Goyal'
    description = 'Convert ODT (OpenOffice) files to HTML'
    file_types  = set(['odt'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        return Extract()(stream, '.', log)
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -7,31 +7,38 @@ __docformat__ = 'restructuredtext en'
 class PDBError(Exception):
    pass
 FORMAT_READERS = None
-from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
+def _import_readers():
-from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
+    global FORMAT_READERS
-from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
+    from calibre.ebooks.pdb.ereader.reader import Reader as ereader_reader
-from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
+    from calibre.ebooks.pdb.palmdoc.reader import Reader as palmdoc_reader
-from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
+    from calibre.ebooks.pdb.ztxt.reader import Reader as ztxt_reader
    from calibre.ebooks.pdb.pdf.reader import Reader as pdf_reader
    from calibre.ebooks.pdb.plucker.reader import Reader as plucker_reader
-FORMAT_READERS = {
+    FORMAT_READERS = {
-    'PNPdPPrs': ereader_reader,
+        'PNPdPPrs': ereader_reader,
-    'PNRdPPrs': ereader_reader,
+        'PNRdPPrs': ereader_reader,
-    'zTXTGPlm': ztxt_reader,
+        'zTXTGPlm': ztxt_reader,
-    'TEXtREAd': palmdoc_reader,
+        'TEXtREAd': palmdoc_reader,
-    '.pdfADBE': pdf_reader,
+        '.pdfADBE': pdf_reader,
-    'DataPlkr': plucker_reader,
+        'DataPlkr': plucker_reader,
-}
+    }
-from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
+ALL_FORMAT_WRITERS = {'doc', 'ztxt', 'ereader'}
-from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
+FORMAT_WRITERS = None
-from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
+def _import_writers():
    global FORMAT_WRITERS
    from calibre.ebooks.pdb.palmdoc.writer import Writer as palmdoc_writer
    from calibre.ebooks.pdb.ztxt.writer import Writer as ztxt_writer
    from calibre.ebooks.pdb.ereader.writer import Writer as ereader_writer
-FORMAT_WRITERS = {
+    FORMAT_WRITERS = {
-    'doc': palmdoc_writer,
+        'doc': palmdoc_writer,
-    'ztxt': ztxt_writer,
+        'ztxt': ztxt_writer,
-    'ereader': ereader_writer,
+        'ereader': ereader_writer,
-}
+    }
 IDENTITY_TO_NAME = {
    'PNPdPPrs': 'eReader',
@ -69,11 +76,17 @@ def get_reader(identity):
    '''
    Returns None if no reader is found for the identity.
    '''
    global FORMAT_READERS
    if FORMAT_READERS is None:
        _import_readers()
    return FORMAT_READERS.get(identity, None)
 def get_writer(extension):
    '''
    Returns None if no writer is found for extension.
    '''
    global FORMAT_WRITERS
    if FORMAT_WRITERS is None:
        _import_writers()
    return FORMAT_WRITERS.get(extension, None)
--- a/src/calibre/ebooks/rb/reader.py
+++ b/src/calibre/ebooks/rb/reader.py
@ -89,7 +89,7 @@ class Reader(object):
            output += self.stream.read(toc_item.size).decode('cp1252' if self.encoding is None else self.encoding, 'replace')
        with open(os.path.join(output_dir, toc_item.name), 'wb') as html:
-            html.write(output.encode('utf-8'))
+            html.write(output.replace('<TITLE>', '<TITLE> ').encode('utf-8'))
    def get_image(self, toc_item, output_dir):
        if toc_item.flags != 0:
--- a/src/calibre/ebooks/rtf/input.py
+++ b/src/calibre/ebooks/rtf/input.py
@ -2,42 +2,9 @@ from __future__ import with_statement
 __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import os, glob, re, textwrap
 from lxml import etree
 from calibre.customize.conversion import InputFormatPlugin
 border_style_map = {
        'single' : 'solid',
        'double-thickness-border' : 'double',
        'shadowed-border': 'outset',
        'double-border': 'double',
        'dotted-border': 'dotted',
        'dashed': 'dashed',
        'hairline': 'solid',
        'inset': 'inset',
        'dash-small': 'dashed',
        'dot-dash': 'dotted',
        'dot-dot-dash': 'dotted',
        'outset': 'outset',
        'tripple': 'double',
        'triple': 'double',
        'thick-thin-small': 'solid',
        'thin-thick-small': 'solid',
        'thin-thick-thin-small': 'solid',
        'thick-thin-medium': 'solid',
        'thin-thick-medium': 'solid',
        'thin-thick-thin-medium': 'solid',
        'thick-thin-large': 'solid',
        'thin-thick-thin-large': 'solid',
        'wavy': 'ridge',
        'double-wavy': 'ridge',
        'striped': 'ridge',
        'emboss': 'inset',
        'engrave': 'inset',
        'frame': 'ridge',
 }
 class InlineClass(etree.XSLTExtension):
@ -71,261 +38,3 @@ class InlineClass(etree.XSLTExtension):
        output_parent.text = ' '.join(classes)
 class RTFInput(InputFormatPlugin):
    name        = 'RTF Input'
    author      = 'Kovid Goyal'
    description = 'Convert RTF files to HTML'
    file_types  = set(['rtf'])
    def generate_xml(self, stream):
        from calibre.ebooks.rtf2xml.ParseRtf import ParseRtf
        ofile = 'dataxml.xml'
        run_lev, debug_dir, indent_out = 1, None, 0
        if getattr(self.opts, 'debug_pipeline', None) is not None:
            try:
                os.mkdir('rtfdebug')
                debug_dir = 'rtfdebug'
                run_lev = 4
                indent_out = 1
                self.log('Running RTFParser in debug mode')
            except:
                self.log.warn('Impossible to run RTFParser in debug mode')
        parser = ParseRtf(
            in_file    = stream,
            out_file   = ofile,
            # Convert symbol fonts to unicode equivalents. Default
            # is 1
            convert_symbol = 1,
            # Convert Zapf fonts to unicode equivalents. Default
            # is 1.
            convert_zapf = 1,
            # Convert Wingding fonts to unicode equivalents.
            # Default is 1.
            convert_wingdings = 1,
            # Convert RTF caps to real caps.
            # Default is 1.
            convert_caps = 1,
            # Indent resulting XML.
            # Default is 0 (no indent).
            indent = indent_out,
            # Form lists from RTF. Default is 1.
            form_lists = 1,
            # Convert headings to sections. Default is 0.
            headings_to_sections = 1,
            # Group paragraphs with the same style name. Default is 1.
            group_styles = 1,
            # Group borders. Default is 1.
            group_borders = 1,
            # Write or do not write paragraphs. Default is 0.
            empty_paragraphs = 1,
            #debug
            deb_dir = debug_dir,
            run_level = run_lev,
        )
        parser.parse_rtf()
        with open(ofile, 'rb') as f:
            return f.read()
    def extract_images(self, picts):
        import imghdr
        self.log('Extracting images...')
        with open(picts, 'rb') as f:
            raw = f.read()
        picts = filter(len, re.findall(r'\{\\pict([^}]+)\}', raw))
        hex = re.compile(r'[^a-fA-F0-9]')
        encs = [hex.sub('', pict) for pict in picts]
        count = 0
        imap = {}
        for enc in encs:
            if len(enc) % 2 == 1:
                enc = enc[:-1]
            data = enc.decode('hex')
            fmt = imghdr.what(None, data)
            if fmt is None:
                fmt = 'wmf'
            count += 1
            name = '%04d.%s' % (count, fmt)
            with open(name, 'wb') as f:
                f.write(data)
            imap[count] = name
            # with open(name+'.hex', 'wb') as f:
                # f.write(enc)
        return self.convert_images(imap)
    def convert_images(self, imap):
        self.default_img = None
        for count, val in imap.iteritems():
            try:
                imap[count] = self.convert_image(val)
            except:
                self.log.exception('Failed to convert', val)
        return imap
    def convert_image(self, name):
        if not name.endswith('.wmf'):
            return name
        try:
            return self.rasterize_wmf(name)
        except:
            self.log.exception('Failed to convert WMF image %r'%name)
        return self.replace_wmf(name)
    def replace_wmf(self, name):
        from calibre.ebooks import calibre_cover
        if self.default_img is None:
            self.default_img = calibre_cover('Conversion of WMF images is not supported',
            'Use Microsoft Word or OpenOffice to save this RTF file'
            ' as HTML and convert that in calibre.', title_size=36,
            author_size=20)
        name = name.replace('.wmf', '.jpg')
        with open(name, 'wb') as f:
            f.write(self.default_img)
        return name
    def rasterize_wmf(self, name):
        from calibre.utils.wmf.parse import wmf_unwrap
        with open(name, 'rb') as f:
            data = f.read()
        data = wmf_unwrap(data)
        name = name.replace('.wmf', '.png')
        with open(name, 'wb') as f:
            f.write(data)
        return name
    def write_inline_css(self, ic, border_styles):
        font_size_classes = ['span.fs%d { font-size: %spt }'%(i, x) for i, x in
                enumerate(ic.font_sizes)]
        color_classes = ['span.col%d { color: %s }'%(i, x) for i, x in
                enumerate(ic.colors)]
        css = textwrap.dedent('''
        span.none {
            text-decoration: none; font-weight: normal;
            font-style: normal; font-variant: normal
        }
        span.italics { font-style: italic }
        span.bold { font-weight: bold }
        span.small-caps { font-variant: small-caps }
        span.underlined { text-decoration: underline }
        span.strike-through { text-decoration: line-through }
        ''')
        css += '\n'+'\n'.join(font_size_classes)
        css += '\n' +'\n'.join(color_classes)
        for cls, val in border_styles.iteritems():
            css += '\n\n.%s {\n%s\n}'%(cls, val)
        with open('styles.css', 'ab') as f:
            f.write(css)
    def convert_borders(self, doc):
        border_styles = []
        style_map = {}
        for elem in doc.xpath(r'//*[local-name()="cell"]'):
            style = ['border-style: hidden', 'border-width: 1px',
                    'border-color: black']
            for x in ('bottom', 'top', 'left', 'right'):
                bs = elem.get('border-cell-%s-style'%x, None)
                if bs:
                    cbs = border_style_map.get(bs, 'solid')
                    style.append('border-%s-style: %s'%(x, cbs))
                bw = elem.get('border-cell-%s-line-width'%x, None)
                if bw:
                    style.append('border-%s-width: %spt'%(x, bw))
                bc = elem.get('border-cell-%s-color'%x, None)
                if bc:
                    style.append('border-%s-color: %s'%(x, bc))
            style = ';\n'.join(style)
            if style not in border_styles:
                border_styles.append(style)
            idx = border_styles.index(style)
            cls = 'border_style%d'%idx
            style_map[cls] = style
            elem.set('class', cls)
        return style_map
    def convert(self, stream, options, file_ext, log,
                accelerators):
        from calibre.ebooks.metadata.meta import get_metadata
        from calibre.ebooks.metadata.opf2 import OPFCreator
        from calibre.ebooks.rtf2xml.ParseRtf import RtfInvalidCodeException
        self.opts = options
        self.log = log
        self.log('Converting RTF to XML...')
        try:
            xml = self.generate_xml(stream.name)
        except RtfInvalidCodeException as e:
            raise ValueError(_('This RTF file has a feature calibre does not '
            'support. Convert it to HTML first and then try it.\n%s')%e)
        d = glob.glob(os.path.join('*_rtf_pict_dir', 'picts.rtf'))
        if d:
            imap = {}
            try:
                imap = self.extract_images(d[0])
            except:
                self.log.exception('Failed to extract images...')
        self.log('Parsing XML...')
        parser = etree.XMLParser(recover=True, no_network=True)
        doc = etree.fromstring(xml, parser=parser)
        border_styles = self.convert_borders(doc)
        for pict in doc.xpath('//rtf:pict[@num]',
                namespaces={'rtf':'http://rtf2xml.sourceforge.net/'}):
            num = int(pict.get('num'))
            name = imap.get(num, None)
            if name is not None:
                pict.set('num', name)
        self.log('Converting XML to HTML...')
        inline_class = InlineClass(self.log)
        styledoc = etree.fromstring(P('templates/rtf.xsl', data=True))
        extensions = { ('calibre', 'inline-class') : inline_class }
        transform = etree.XSLT(styledoc, extensions=extensions)
        result = transform(doc)
        html = 'index.xhtml'
        with open(html, 'wb') as f:
            res = transform.tostring(result)
            # res = res[:100].replace('xmlns:html', 'xmlns') + res[100:]
            #clean multiple \n
            res = re.sub('\n+', '\n', res)
            # Replace newlines inserted by the 'empty_paragraphs' option in rtf2xml with html blank lines
            # res = re.sub('\s*<body>', '<body>', res)
            # res = re.sub('(?<=\n)\n{2}',
                    # u'<p>\u00a0</p>\n'.encode('utf-8'), res)
            f.write(res)
        self.write_inline_css(inline_class, border_styles)
        stream.seek(0)
        mi = get_metadata(stream, 'rtf')
        if not mi.title:
            mi.title = _('Unknown')
        if not mi.authors:
            mi.authors = [_('Unknown')]
        opf = OPFCreator(os.getcwd(), mi)
        opf.create_manifest([('index.xhtml', None)])
        opf.create_spine(['index.xhtml'])
        opf.render(open('metadata.opf', 'wb'))
        return os.path.abspath('metadata.opf')
 #ebook-convert "bad.rtf" test.epub -v -d "E:\Mes eBooks\Developpement\debug"
 # os.makedirs("E:\\Mes eBooks\\Developpement\\rtfdebug")
 # debug_dir = "E:\\Mes eBooks\\Developpement\\rtfdebug"
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -16,7 +16,7 @@ from calibre.ebooks.metadata.opf2 import OPFCreator
 from calibre.ebooks.conversion.preprocess import DocAnalysis
 from calibre.utils.cleantext import clean_ascii_chars
-HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s</title></head><body>\n%s\n</body></html>'
+HTML_TEMPLATE = u'<html><head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"/><title>%s </title></head><body>\n%s\n</body></html>'
 def clean_txt(txt):
    '''
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -102,6 +102,7 @@ gprefs.defaults['cb_fullscreen'] = False
 gprefs.defaults['worker_max_time'] = 0
 gprefs.defaults['show_files_after_save'] = True
 gprefs.defaults['auto_add_path'] = None
 gprefs.defaults['auto_add_check_for_duplicates'] = False
 # }}}
 NONE = QVariant() #: Null value to return from the data function of item models
--- a/src/calibre/gui2/add.py
+++ b/src/calibre/gui2/add.py
@ -382,7 +382,8 @@ class Adder(QObject): # {{{
        if not duplicates:
            return self.duplicates_processed()
        self.pd.hide()
-        files = [x[0].title for x in duplicates]
+        files = [_('%s by %s')%(x[0].title, x[0].format_field('authors')[1])
                for x in duplicates]
        if question_dialog(self._parent, _('Duplicates found!'),
                        _('Books with the same title as the following already '
                        'exist in the database. Add them anyway?'),
--- a/src/calibre/gui2/auto_add.py
+++ b/src/calibre/gui2/auto_add.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, tempfile, shutil
+import os, tempfile, shutil, time
 from threading import Thread, Event
 from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
@ -15,6 +15,7 @@ from PyQt4.Qt import (QFileSystemWatcher, QObject, Qt, pyqtSignal, QTimer)
 from calibre import prints
 from calibre.ptempfile import PersistentTemporaryDirectory
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.gui2 import question_dialog, gprefs
 class Worker(Thread):
@ -41,25 +42,58 @@ class Worker(Thread):
                traceback.print_exc()
    def auto_add(self):
-        from calibre.utils.ipc.simple_worker import fork_job
+        from calibre.utils.ipc.simple_worker import fork_job, WorkerError
        from calibre.ebooks.metadata.opf2 import metadata_to_opf
        from calibre.ebooks.metadata.meta import metadata_from_filename
-        files = [x for x in os.listdir(self.path) if x not in self.staging
+        files = [x for x in os.listdir(self.path) if
-                and os.path.isfile(os.path.join(self.path, x)) and
+                    # Must not be in the process of being added to the db
-                os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and
+                    x not in self.staging
-                os.path.splitext(x)[1][1:].lower() in self.be]
+                    # Firefox creates 0 byte placeholder files when downloading
                    and os.stat(os.path.join(self.path, x)).st_size > 0
                    # Must be a file
                    and os.path.isfile(os.path.join(self.path, x))
                    # Must have read and write permissions
                    and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK)
                    # Must be a known ebook file type
                    and os.path.splitext(x)[1][1:].lower() in self.be
                ]
        data = {}
        # Give any in progress copies time to complete
        time.sleep(2)
        for fname in files:
            f = os.path.join(self.path, fname)
            # Try opening the file for reading, if the OS prevents us, then at
            # least on windows, it means the file is open in another
            # application for writing. We will get notified by
            # QFileSystemWatcher when writing is completed, so ignore for now.
            try:
                open(f, 'rb').close()
            except:
                continue
            tdir = tempfile.mkdtemp(dir=self.tdir)
            try:
                fork_job('calibre.ebooks.metadata.meta',
                        'forked_read_metadata', (f, tdir), no_output=True)
            except WorkerError as e:
                prints('Failed to read metadata from:', fname)
                prints(e.orig_tb)
            except:
                import traceback
                traceback.print_exc()
            # Ensure that the pre-metadata file size is present. If it isn't,
            # write 0 so that the file is rescanned
            szpath = os.path.join(tdir, 'size.txt')
            try:
                with open(szpath, 'rb') as f:
                    int(f.read())
            except:
                with open(szpath, 'wb') as f:
                    f.write(b'0')
            opfpath = os.path.join(tdir, 'metadata.opf')
            try:
                if os.stat(opfpath).st_size < 30:
@ -125,25 +159,71 @@ class AutoAdder(QObject):
        m = gui.library_view.model()
        count = 0
        needs_rescan = False
        duplicates = []
        for fname, tdir in data.iteritems():
            paths = [os.path.join(self.worker.path, fname)]
            sz = os.path.join(tdir, 'size.txt')
            try:
                with open(sz, 'rb') as f:
                    sz = int(f.read())
                if sz != os.stat(paths[0]).st_size:
                    raise Exception('Looks like the file was written to after'
                            ' we tried to read metadata')
            except:
                needs_rescan = True
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass
                continue
            mi = os.path.join(tdir, 'metadata.opf')
            if not os.access(mi, os.R_OK):
                continue
            mi = [OPF(open(mi, 'rb'), tdir,
                    populate_spine=False).to_book_metadata()]
-            m.add_books(paths, [os.path.splitext(fname)[1][1:].upper()], mi,
+            dups, num = m.add_books(paths,
-                    add_duplicates=True)
+                    [os.path.splitext(fname)[1][1:].upper()], mi,
                    add_duplicates=not gprefs['auto_add_check_for_duplicates'])
            if dups:
                path = dups[0][0]
                with open(os.path.join(tdir, 'dup_cache.'+dups[1][0].lower()),
                        'wb') as dest, open(path, 'rb') as src:
                    shutil.copyfileobj(src, dest)
                    dups[0][0] = dest.name
                duplicates.append(dups)
            try:
                os.remove(paths[0])
                self.worker.staging.remove(fname)
            except:
                pass
            count += num
        if duplicates:
            paths, formats, metadata = [], [], []
            for p, f, mis in duplicates:
                paths.extend(p)
                formats.extend(f)
                metadata.extend(mis)
            files = [_('%s by %s')%(mi.title, mi.format_field('authors')[1])
                    for mi in metadata]
            if question_dialog(self.parent(), _('Duplicates found!'),
                        _('Books with the same title as the following already '
                        'exist in the database. Add them anyway?'),
                        '\n'.join(files)):
             dups, num = m.add_books(paths, formats, metadata,
                     add_duplicates=True)
             count += num
        for tdir in data.itervalues():
            try:
                os.remove(os.path.join(self.worker.path, fname))
                try:
                    self.worker.staging.remove(fname)
                except KeyError:
                    pass
                shutil.rmtree(tdir)
            except:
                pass
            count += 1
        if count > 0:
            m.books_added(count)
@ -153,4 +233,7 @@ class AutoAdder(QObject):
            if hasattr(gui, 'db_images'):
                gui.db_images.reset()
        if needs_rescan:
            QTimer.singleShot(2000, self.dir_changed)
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -38,14 +38,24 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
                ans = unicode(col.name())
        return ans
-    f = QFontInfo(QApplication.font(widget)).pixelSize()
+    fi = QFontInfo(QApplication.font(widget))
    f = fi.pixelSize()+1
    fam = unicode(fi.family()).strip().replace('"', '')
    if not fam:
        fam = 'sans-serif'
    c = color_to_string(QApplication.palette().color(QPalette.Normal,
                    QPalette.WindowText))
    templ = u'''\
    <html>
        <head>
        <style type="text/css">
-            body, td {background-color: transparent; font-size: %dpx; color: %s }
+            body, td {
                background-color: transparent;
                font-size: %dpx;
                font-family: "%s",sans-serif;
                color: %s
            }
        </style>
        <style type="text/css">
            %s
@ -55,7 +65,7 @@ def render_html(mi, css, vertical, widget, all_fields=False): # {{{
        %%s
        </body>
    <html>
-    '''%(f, c, css)
+    '''%(f, fam, c, css)
    fm = getattr(mi, 'field_metadata', field_metadata)
    fl = dict(get_field_list(fm))
    show_comments = (all_fields or fl.get('comments', True))
--- a/src/calibre/gui2/catalog/catalog_bibtex.py
+++ b/src/calibre/gui2/catalog/catalog_bibtex.py
@ -31,7 +31,7 @@ class PluginWidget(QWidget, Ui_Form):
        self.setupUi(self)
    def initialize(self, name, db): #not working properly to update
-        from calibre.library.catalog import FIELDS
+        from calibre.library.catalogs import FIELDS
        self.all_fields = [x for x in FIELDS if x != 'all']
        #add custom columns
--- a/src/calibre/gui2/catalog/catalog_csv_xml.py
+++ b/src/calibre/gui2/catalog/catalog_csv_xml.py
@ -21,7 +21,7 @@ class PluginWidget(QWidget, Ui_Form):
    def __init__(self, parent=None):
        QWidget.__init__(self, parent)
        self.setupUi(self)
-        from calibre.library.catalog import FIELDS
+        from calibre.library.catalogs import FIELDS
        self.all_fields = []
        for x in FIELDS:
            if x != 'all':
--- a/src/calibre/gui2/comments_editor.py
+++ b/src/calibre/gui2/comments_editor.py
@ -251,8 +251,12 @@ class EditorWidget(QWebView): # {{{
        def fset(self, val):
            self.setHtml(val)
-            f = QFontInfo(QApplication.font(self)).pixelSize()
+            fi = QFontInfo(QApplication.font(self))
-            style = 'font-size: %dpx;' % (f,)
+            f  = fi.pixelSize()+1
            fam = unicode(fi.family()).strip().replace('"', '')
            if not fam:
                fam = 'sans-serif'
            style = 'font-size: %fpx; font-family:"%s",sans-serif;' % (f, fam)
            # toList() is needed because PyQt on Debian is old/broken
            for body in self.page().mainFrame().documentElement().findAll('body').toList():
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -7,7 +7,8 @@ import os, traceback, Queue, time, cStringIO, re, sys
 from threading import Thread
 from PyQt4.Qt import (QMenu, QAction, QActionGroup, QIcon, SIGNAL,
-                     Qt, pyqtSignal, QDialog, QObject)
+                     Qt, pyqtSignal, QDialog, QObject, QVBoxLayout,
                     QDialogButtonBox)
 from calibre.customize.ui import (available_input_formats, available_output_formats,
    device_plugins)
@ -718,6 +719,31 @@ class DeviceMixin(object): # {{{
    def disconnect_mounted_device(self):
        self.device_manager.umount_device()
    def configure_connected_device(self):
        if not self.device_manager.is_device_connected: return
        if self.job_manager.has_device_jobs(queued_also=True):
            return error_dialog(self, _('Running jobs'),
                    _('Cannot configure the device while there are running'
                        ' device jobs.'), show=True)
        dev = self.device_manager.connected_device
        cw = dev.config_widget()
        d = QDialog(self)
        d.setWindowTitle(_('Configure %s')%dev.get_gui_name())
        d.setWindowIcon(QIcon(I('config.png')))
        l = QVBoxLayout(d)
        d.setLayout(l)
        bb = QDialogButtonBox(QDialogButtonBox.Ok|QDialogButtonBox.Cancel)
        bb.accepted.connect(d.accept)
        bb.rejected.connect(d.reject)
        l.addWidget(cw)
        l.addWidget(bb)
        if d.exec_() == d.Accepted:
            dev.save_settings(cw)
            warning_dialog(self, _('Disconnect device'),
                    _('Disconnect and re-connect the %s for your changes to'
                        ' be applied.')%dev.get_gui_name(), show=True,
                    show_copy_button=False)
    def _sync_action_triggered(self, *args):
        m = getattr(self, '_sync_menu', None)
        if m is not None:
--- a/src/calibre/gui2/device_drivers/configwidget.py
+++ b/src/calibre/gui2/device_drivers/configwidget.py
@ -97,6 +97,7 @@ class ConfigWidget(QWidget, Ui_ConfigWidget):
                l.setWordWrap(True)
                if settings.extra_customization:
                    self.opt_extra_customization.setText(settings.extra_customization)
                self.opt_extra_customization.setCursorPosition(0)
                self.extra_layout.addWidget(l, 0, 0)
                self.extra_layout.addWidget(self.opt_extra_customization, 1, 0)
        self.opt_save_template.setText(settings.save_template)
--- a/src/calibre/gui2/layout.py
+++ b/src/calibre/gui2/layout.py
@ -25,6 +25,7 @@ class LocationManager(QObject): # {{{
    locations_changed = pyqtSignal()
    unmount_device = pyqtSignal()
    location_selected = pyqtSignal(object)
    configure_device = pyqtSignal()
    def __init__(self, parent=None):
        QObject.__init__(self, parent)
@ -57,6 +58,10 @@ class LocationManager(QObject): # {{{
                a = m.addAction(QIcon(I('eject.png')), _('Eject this device'))
                a.triggered.connect(self._eject_requested)
                self._mem.append(a)
                a = m.addAction(QIcon(I('config.png')), _('Configure this device'))
                a.triggered.connect(self._configure_requested)
                self._mem.append(a)
            else:
                ac.setToolTip(tooltip)
            ac.setMenu(m)
@ -109,6 +114,9 @@ class LocationManager(QObject): # {{{
    def _eject_requested(self, *args):
        self.unmount_device.emit()
    def _configure_requested(self):
        self.configure_device.emit()
    def update_devices(self, cp=(None, None), fs=[-1, -1, -1], icon=None):
        if icon is None:
            icon = I('reader.png')
--- a/src/calibre/gui2/main.py
+++ b/src/calibre/gui2/main.py
@ -138,8 +138,8 @@ class GuiRunner(QObject):
        if self.splash_screen is not None:
            self.splash_screen.finish(main)
        if DEBUG:
-            prints('Started up in', time.time() - self.startup_time, 'with',
+            prints('Started up in %.2f seconds'%(time.time() -
-                    len(db.data), 'books')
+                self.startup_time), 'with', len(db.data), 'books')
        add_filesystem_book = partial(main.iactions['Add Books'].add_filesystem_book, allow_device=False)
        sys.excepthook = main.unhandled_exception
        if len(self.args) > 1:
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -323,14 +323,19 @@ class Comments(QWebView): # {{{
                    ans = unicode(col.name())
            return ans
-        f = QFontInfo(QApplication.font(self.parent())).pixelSize()
+        fi = QFontInfo(QApplication.font(self.parent()))
        f = fi.pixelSize()+1
        fam = unicode(fi.family()).strip().replace('"', '')
        if not fam:
            fam = 'sans-serif'
        c = color_to_string(QApplication.palette().color(QPalette.Normal,
                        QPalette.WindowText))
        templ = '''\
        <html>
            <head>
            <style type="text/css">
-                body, td {background-color: transparent; font-size: %dpx; color: %s }
+                body, td {background-color: transparent; font-family: %s; font-size: %dpx; color: %s }
                a { text-decoration: none; color: blue }
                div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
                table { margin-bottom: 0; padding-bottom: 0; }
@ -342,7 +347,7 @@ class Comments(QWebView): # {{{
            </div>
            </body>
        <html>
-        '''%(f, c)
+        '''%(fam, f, c)
        self.setHtml(templ%html)
 # }}}
--- a/src/calibre/gui2/preferences/adding.py
+++ b/src/calibre/gui2/preferences/adding.py
@ -32,6 +32,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        r('automerge', gprefs, choices=choices)
        r('new_book_tags', prefs, setting=CommaSeparatedList)
        r('auto_add_path', gprefs, restart_required=True)
        r('auto_add_check_for_duplicates', gprefs)
        self.filename_pattern = FilenamePattern(self)
        self.metadata_box.layout().insertWidget(0, self.filename_pattern)
--- a/src/calibre/gui2/preferences/adding.ui
+++ b/src/calibre/gui2/preferences/adding.ui
@ -193,6 +193,19 @@ Author matching is exact.</string>
         </property>
        </widget>
       </item>
       <item>
        <widget class="QCheckBox" name="opt_auto_add_check_for_duplicates">
         <property name="toolTip">
          <string>If set, this option will causes calibre to check if a file
 being auto-added is already in the calibre library.
 If it is, a meesage will pop up asking you whether
 you want to add it anyway.</string>
         </property>
         <property name="text">
          <string>Check for &amp;duplicates when auto-adding files</string>
         </property>
        </widget>
       </item>
       <item>
        <spacer name="verticalSpacer_2">
         <property name="orientation">
--- a/src/calibre/gui2/preferences/main.py
+++ b/src/calibre/gui2/preferences/main.py
@ -171,7 +171,7 @@ class Preferences(QMainWindow):
        self.committed = False
        self.close_after_initial = close_after_initial
-        self.resize(900, 720)
+        self.resize(930, 720)
        nh, nw = min_available_height()-25, available_width()-10
        if nh < 0:
            nh = 800
--- a/src/calibre/gui2/store/search/search.py
+++ b/src/calibre/gui2/store/search/search.py
@ -45,6 +45,7 @@ class SearchDialog(QDialog, Ui_Dialog):
        self.cache_pool = CacheUpdateThreadPool(self.cache_thread_count)
        self.results_view.model().cover_pool.set_thread_count(self.cover_thread_count)
        self.results_view.model().details_pool.set_thread_count(self.details_thread_count)
        self.results_view.setCursor(Qt.PointingHandCursor)
        # Check for results and hung threads.
        self.checker = QTimer()
--- a/src/calibre/gui2/ui.py
+++ b/src/calibre/gui2/ui.py
@ -265,6 +265,7 @@ class Main(MainWindow, MainWindowMixin, DeviceMixin, EmailMixin, # {{{
        ####################### Location Manager ########################
        self.location_manager.location_selected.connect(self.location_selected)
        self.location_manager.unmount_device.connect(self.device_manager.umount_device)
        self.location_manager.configure_device.connect(self.configure_connected_device)
        self.eject_action.triggered.connect(self.device_manager.umount_device)
        #################### Update notification ###################
--- a/src/calibre/gui2/wizard/init.py
+++ b/src/calibre/gui2/wizard/init.py
@ -28,6 +28,7 @@ from calibre.gui2 import min_available_height, available_width
 from calibre.utils.config import dynamic, prefs
 from calibre.gui2 import NONE, choose_dir, error_dialog
 from calibre.gui2.dialogs.progress import ProgressDialog
 from calibre.customize.ui import device_plugins
 # Devices {{{
@ -251,15 +252,39 @@ class Android(Device):
    id = 'android'
    supports_color = True
-class AndroidTablet(Device):
+    @classmethod
    def commit(cls):
        super(Android, cls).commit()
        for plugin in device_plugins(include_disabled=True):
            if plugin.name == 'Android driver':
                plugin.configure_for_generic_epub_app()
 class AndroidTablet(Android):
    name = 'Android tablet'
    output_format = 'EPUB'
    manufacturer = 'Android'
    id = 'android_tablet'
    supports_color = True
    output_profile = 'tablet'
 class AndroidPhoneWithKindle(Android):
    name = 'Android phone with Kindle reader'
    output_format = 'MOBI'
    id = 'android_phone_with_kindle'
    output_profile = 'kindle'
    @classmethod
    def commit(cls):
        super(Android, cls).commit()
        for plugin in device_plugins(include_disabled=True):
            if plugin.name == 'Android driver':
                plugin.configure_for_kindle_app()
 class AndroidTabletWithKindle(AndroidPhoneWithKindle):
    name = 'Android tablet with Kindle reader'
    id = 'android_tablet_with_kindle'
    output_profile = 'kindle_fire'
 class HanlinV3(Device):
    name = 'Hanlin V3'
--- a/src/calibre/library/catalog.py
+++ b/src/calibre/library/catalog.py
--- a/Show More
+++ b/Show More