Merge from trunk

2025-07-09 03:04:10 -04:00 · 2011-04-06 08:10:12 +01:00 · 2011-04-06 08:10:12 +01:00 · b1f788a607
commit b1f788a607
parent 6ed0d245e4 67eb873eab
172 changed files with 37833 additions and 32092 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,77 @@
 #  new recipes:
 #    - title: 
 - version: 0.7.53
  date: 2011-04-01
  new features:
    - title: "Email delivery: You can now specify a subject that calibre will use when sending emails per email account, configured in Preferences->Sending by email. The subject is a template of the same kind used in Save to Disk, etc. So you can specift the title/authors/series/whatever in the template."
      tickets: [743535]
    - title: "Apple driver: When an iDevice is detected, inform the user about the Connect to iTunes method instead of trying to connect directly to the device, as the latter can be buggy. See http://www.mobileread.com/forums/showthread.php?t=127883 for details"
    - title: "SONY driver: Search for books on the device in all directories not just database/media/books. This can be turned off by customizing the SONY plugin in Preferences->Plugins"
    - title: "EPUB Output: Remove any margins specified via an Adobe page template in the input document. This means that the margins specified in calibre are more likely to be the actual margins used."
    - title: "When reading metadata from filenames, allow publisher and published date to be read from the filename"
      tickets: [744020]
    - title: "Remove the option to show a second tool bar from Preferences->Look & Feel. Instead go to Preferences->Toolbars and add items to the second toolbar to control exactly what is visible there."
      tickets: [742686]
    - title: "Add a tweak that can be used to have the calibre content server listen for IPv6 connections."
      tickets: [743486]
    - title: "When clicking Next or Previous in the edit metadata dialog, then active book in the main book list is also changed"
      tickets: [743533]
    - title: "Remember the previously used setting for Match all/Match any under the Tag Browser when calibre restarts"
      tickets: [743645]
    - title: "FB2 Output: Option to set the FB2 genre explicitly."
      tickets: [743178]
    - title: "Plugin developers: calibre now has a new plugin API, see http://calibre-ebook.com/user_manual/creating_plugins.html. Your existing plugins should continue to work, but it would be good to test them to make sure."
  bug fixes:
    - title: "Fix text color in the search bar set to black instead of the system font color"
      tickets: [746846]
    - title: "Workaround for Word bug where Word uses gb2312 as the encoding when exporting CHinese docs to HTML istead of gbk"
      tickets: [745428]
    - title: "Make sorting on the device view faster and more robust."
      tickets: [742626]
    - title: "E-book viewer: Fix viewer losing place in very long single file documents when window resized."
      tickets: [745001]
    - title: "MOBI Output: Workaround for Amazon's MOBI renderer not rendering top margins on ul and ol tags."
      tickets: [744365]
    - title: "EPUB Input: Workaround for invalid EPUBs produced by someone named 'ibooks, Inc.'."
      tickets: [744122]
    - title: "RTF Input: Handle RTF files with too many levels of list nesting."
      tickets: [743243]
  improved recipes:
    - Irish Times
    - LifeHacker
    - Estadao
    - Folha de Sao Paulo
  new recipes:
    - title: Financieele Dagblad
      author: marvin_2
    - title: "Prost Amerika, WV Hooligan and SB Nation"
      author: rylsfan
    - title: "Cracked.com"
      author: Nudgenudge
 - version: 0.7.52
  date: 2011-03-25
--- a/8
+++ b/8
@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
+
-the facilities of the calibre-debug command to hack on the calibre source. 
+Note that you *do not* need to install from source to hack on
 the calibre source code. To get started with calibre development,
 use a normal calibre install and follow the instructions at
 http://calibre-ebook.com/user_manual/develop.html
 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
    sudo python setup.py develop
 Use the -h flag for help on the develop command.
--- a/2
+++ b/2
@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
 For screenshots: https://calibre-ebook.com/demo
 For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual
 For source code access:
 bzr branch lp:calibre
--- a/recipes/developpez.recipe
+++ b/recipes/developpez.recipe
@ -0,0 +1,21 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1301849956(BasicNewsRecipe):
    title          = u'Developpez.com'
    description = u'Toutes les news du site Developpez.com'
    publisher = u'Developpez.com'
    timefmt = ' [%a, %d %b, %Y]'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    language = 'fr'
    __author__ = 'louhike'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
    feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
    def get_cover_url(self):
        return 'http://javascript.developpez.com/template/images/logo.gif'
--- a/recipes/estadao.recipe
+++ b/recipes/estadao.recipe
@ -1,4 +1,3 @@
 #!/usr/bin/env  python
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime, timedelta
 from calibre.ebooks.BeautifulSoup import Tag,BeautifulSoup
@ -6,18 +5,17 @@ from calibre.utils.magick import Image, PixelWand
 from urllib2 import Request, urlopen, URLError
 class Estadao(BasicNewsRecipe):
-    THUMBALIZR_API        = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
+    THUMBALIZR_API        = '' # ---->Get your at http://www.thumbalizr.com/ and put here
    LANGUAGE              = 'pt_br'
    language = 'pt'
    LANGHTM               = 'pt-br'
    ENCODING              = 'utf'
    ENCHTM                = 'utf-8'
    directionhtm          = 'ltr'
-    requires_version      = (0,8,47)
+    requires_version      = (0,7,47)
    news                  = True
    publication_type      = 'newsportal'
-    title                 = u'Estadao'
+    title                 = u'Estad\xe3o'
    __author__            = 'Euler Alves'
    description           = u'Brazilian news from Estad\xe3o'
    publisher             = u'Estad\xe3o'
@ -33,14 +31,6 @@ class Estadao(BasicNewsRecipe):
    remove_empty_feeds    = True
    timefmt               = ' [%d %b %Y (%a)]'
    html2lrf_options      = [
                            '--comment', description
                            ,'--category', category
                            ,'--publisher', publisher
    ]
    html2epub_options     = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"'
    hoje                  = datetime.now()-timedelta(days=2)
    pubdate               = hoje.strftime('%a, %d %b')
    if hoje.hour<10:
@ -69,6 +59,7 @@ class Estadao(BasicNewsRecipe):
                    ,dict(name='script')
    ]
    feeds = [
    (u'\xDAltimas Not\xEDcias', u'http://www.estadao.com.br/rss/ultimas.xml')
    ,(u'Manchetes', u'http://www.estadao.com.br/rss/manchetes.xml')
@ -109,6 +100,8 @@ class Estadao(BasicNewsRecipe):
            img.open(iurl)
            width, height = img.size
            print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
            if img < 0:
                raise RuntimeError('Out of memory')
            pw = PixelWand()
            if( width > height and width > 590) :
                print 'Rotate image'
@ -117,6 +110,7 @@ class Estadao(BasicNewsRecipe):
        return soup
    def get_cover_url(self):
        if self.THUMBALIZR_API:
            cover_url      = self.CAPA
            pedido         = Request(self.CAPA)
            pedido.add_header('User-agent','Mozilla/5.0 (Windows; U; Windows NT 5.1; '+self.LANGHTM+'; userid='+self.THUMBALIZR_API+') Calibre/0.8.47 (like Gecko)')
@ -132,3 +126,4 @@ class Estadao(BasicNewsRecipe):
            except URLError:
                cover_url='http://api.thumbalizr.com/?api_key='+self.THUMBALIZR_API+'&url='+self.SCREENSHOT+'&width=600&quality=90'
                return cover_url
--- a/recipes/f_secure.recipe
+++ b/recipes/f_secure.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1301860159(BasicNewsRecipe):
    title          = u'F-Secure Weblog'
    language = 'en'
    __author__ = 'louhike'
    description = u'All the news from the weblog of F-Secure'
    publisher = u'F-Secure'
    timefmt = ' [%a, %d %b, %Y]'
    encoding = 'ISO-8859-1'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content   = False
    language = 'en_EN'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
    remove_tags = [dict(name='a'),dict(name='hr')]
    feeds          = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
    def get_cover_url(self):
        return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
--- a/recipes/financieele_dagblad.recipe
+++ b/recipes/financieele_dagblad.recipe
@ -0,0 +1,29 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class fd(BasicNewsRecipe):
    title          = u'Het Financieele Dagblad'
    __author__ = 'marvin_2'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    cover_url = 'http://www.fd.nl/static/gfx/logo-fd-164x78.gif'
    language = 'nl'
    keep_only_tags = (dict(name = 'div', attrs = {'class': ['headlinearticle']}))
    remove_tags    = [dict(name='span' , attrs={'class':['opties']})]
    feeds          = [
                        (u'Overzicht',u'http://www.fd.nl/nieuws/overzicht/?view=RSS&profiel=OPENBAAR')
                     ]
    extra_css = '''
        h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:x-large;}
        p{font-family:Arial,Helvetica,sans-serif;}
        strong{font-weight:bold; margin-right:5pt;margin-top:20pt;}
        .datum_ie {font-style:italic;font-size:small;}
        img {align:left;}
        '''
--- a/recipes/folhadesaopaulo.recipe
+++ b/recipes/folhadesaopaulo.recipe
@ -5,16 +5,15 @@ from calibre.utils.magick import Image, PixelWand
 from urllib2 import Request, urlopen, URLError
 class FolhaOnline(BasicNewsRecipe):
-    THUMBALIZR_API        = "0123456789abcdef01234567890" # ---->Get your at http://www.thumbalizr.com/
+    THUMBALIZR_API        = '' # ---->Get your at http://www.thumbalizr.com/ and put here
    LANGUAGE              = 'pt_br'
    language = 'pt'
    LANGHTM               = 'pt-br'
    ENCODING              = 'cp1252'
    ENCHTM                = 'iso-8859-1'
    directionhtm          = 'ltr'
-    requires_version      = (0,8,47)
+    requires_version      = (0,7,47)
    news                  = True
    publication_type      = 'newsportal'
    title                 = u'Folha de S\xE3o Paulo'
    __author__            = 'Euler Alves'
@ -96,6 +95,7 @@ class FolhaOnline(BasicNewsRecipe):
    ,(u'Valdo Cruz', u'http://http://feeds.folha.uol.com.br/folha/pensata/valdocruz/rss091.xml')
    ]
    conversion_options = {
    'title'            : title
    ,'comments'        : description
@ -124,6 +124,8 @@ class FolhaOnline(BasicNewsRecipe):
            img.open(iurl)
            width, height = img.size
            print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
            if img < 0:
                raise RuntimeError('Out of memory')
            pw = PixelWand()
            if( width > height and width > 590) :
                print 'Rotate image'
--- a/recipes/globe_and_mail.recipe
+++ b/recipes/globe_and_mail.recipe
@ -35,8 +35,8 @@ class AdvancedUserRecipe1287083651(BasicNewsRecipe):
      (u'Arts', u'http://www.theglobeandmail.com/news/arts/?service=rss'),
      (u'Life', u'http://www.theglobeandmail.com/life/?service=rss'),
      (u'Real Estate', u'http://www.theglobeandmail.com/real-estate/?service=rss'),
-      (u'Auto', u'http://www.theglobeandmail.com/sports/?service=rss'),
+      (u'Sports', u'http://www.theglobeandmail.com/sports/?service=rss'),
-      (u'Sports', u'http://www.theglobeandmail.com/auto/?service=rss')
+      (u'Drive', u'http://www.theglobeandmail.com/auto/?service=rss')
    ]
    preprocess_regexps = [
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -36,6 +36,7 @@ class Guardian(BasicNewsRecipe):
    remove_tags = [
                        dict(name='div', attrs={'class':["video-content","videos-third-column"]}),
                        dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
                        dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
                        dict(name='ul', attrs={'class':["pagination"]}),
                        dict(name='ul', attrs={'id':["content-actions"]}),
                        #dict(name='img'),
--- a/recipes/hawaii.recipe
+++ b/recipes/hawaii.recipe
@ -2,7 +2,7 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    title = 'West Hawaii Today'
-    __author__ = 'Tony Stegall'
+    __author__ = 'Tony Stegall, fixed by HK'
    language = 'en'
    description = 'Westhawaiitoday.com'
    publisher = 'West Hawaii '
@ -15,7 +15,14 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
    masthead_url = 'http://images.townnews.com/westhawaiitoday.com/art/whttoplogo.gif'
-
+    feeds          = [
-    feeds          = [ 'http://www.westhawaiitoday.com/rss.xml']
+                       ('http://www.westhawaiitoday.com/taxonomy/term/2/feed'),  #Local News
-
+                       ('http://www.westhawaiitoday.com/taxonomy/term/15/feed'), #Local Sports
                       ('http://www.westhawaiitoday.com/taxonomy/term/4/feed'),   #Local Features
                       ('http://www.westhawaiitoday.com/taxonomy/term/12/feed'), #Obituaries
                       ('http://www.westhawaiitoday.com/taxonomy/term/18/feed'), #Letters
                       ('http://www.westhawaiitoday.com/taxonomy/term/19/feed'), #Editorial
                       ('http://www.westhawaiitoday.com/taxonomy/term/20/feed'), #columns
                       ('http://www.westhawaiitoday.com/taxonomy/term/13/feed') #Volcano Update (Sundays)
                    ]
--- a/recipes/icons/estadao.png
+++ b/recipes/icons/estadao.png
--- a/recipes/icons/folhadesaopaulo.png
+++ b/recipes/icons/folhadesaopaulo.png
--- a/recipes/icons/prostamerika.png
+++ b/recipes/icons/prostamerika.png
--- a/recipes/icons/sb_nation.png
+++ b/recipes/icons/sb_nation.png
--- a/recipes/icons/wvhooligan.png
+++ b/recipes/icons/wvhooligan.png
--- a/recipes/idnes.recipe
+++ b/recipes/idnes.recipe
@ -34,7 +34,7 @@ class iHeuteRecipe(BasicNewsRecipe):
                   dict(name='table', attrs={'class':['video-16ku9']})]
    remove_tags_after  = [dict(name='div',attrs={'id':['related','related2']})]
-    keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day']})
+    keep_only_tags = [dict(name='div', attrs={'class':['art-full adwords-text','dil-day','art-full']})
                      ,dict(name='table',attrs={'class':['kemel-box']})]
    def print_version(self, url):
--- a/recipes/iht.recipe
+++ b/recipes/iht.recipe
@ -15,10 +15,10 @@ class InternationalHeraldTribune(BasicNewsRecipe):
    language = 'en'
    oldest_article = 1
-    max_articles_per_feed = 10
+    max_articles_per_feed = 30
    no_stylesheets = True
-    remove_tags    = [dict(name='div', attrs={'class':'footer'}),
+    remove_tags    = [dict(name='div', attrs={'class':['footer','header']}),
                      dict(name=['form'])]
    preprocess_regexps = [
            (re.compile(r'<!-- webtrends.*', re.DOTALL),
@ -26,6 +26,8 @@ class InternationalHeraldTribune(BasicNewsRecipe):
                          ]
    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
    remove_empty_feeds = True
    feeds          = [
                      (u'Frontpage', u'http://www.iht.com/rss/frontpage.xml'),
                      (u'Business', u'http://www.iht.com/rss/business.xml'),
@ -47,12 +49,14 @@ class InternationalHeraldTribune(BasicNewsRecipe):
    temp_files = []
    articles_are_obfuscated = True
-    def get_obfuscated_article(self, url, logger):
+    masthead_url = 'http://graphics8.nytimes.com/images/misc/iht-masthead-logo.gif'
    def get_obfuscated_article(self, url):
        br = self.get_browser()
        br.open(url)
-        br.select_form(name='printFriendly')
+        response1 = br.follow_link(url_regex=re.compile(r'.*pagewanted=print.*'))
-        res = br.submit()
+        html = response1.read()
-        html = res.read()
+        
        self.temp_files.append(PersistentTemporaryFile('_iht.html'))
        self.temp_files[-1].write(html)
        self.temp_files[-1].close()
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan"
+__copyright__ = "2008, Derry FitzGerald. 2009 Modified by Ray Kinsella and David O'Callaghan, 2011 Modified by Phil Burns"
 '''
 irishtimes.com
 '''
@ -9,17 +9,20 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class IrishTimes(BasicNewsRecipe):
    title          = u'The Irish Times'
-    __author__     = "Derry FitzGerald, Ray Kinsella and David O'Callaghan"
+    encoding  = 'ISO-8859-15'
    __author__     = "Derry FitzGerald, Ray Kinsella, David O'Callaghan and Phil Burns"
    language = 'en_IE'
    timefmt = ' (%A, %B %d, %Y)'
-    oldest_article = 3
+
    oldest_article = 1.0
    max_articles_per_feed  = 100
    no_stylesheets = True
-    simultaneous_downloads= 1
+    simultaneous_downloads= 5
    r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
-    extra_css      = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt  }'
+    extra_css      = 'p, div { margin: 0pt; border: 0pt; text-indent: 0.5em } .headline {font-size: large;} \n .fact { padding-top: 10pt  }'
    feeds          = [
                      ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
@ -30,15 +33,29 @@ class IrishTimes(BasicNewsRecipe):
                      ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
                      ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
                      ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
                      ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
                      ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
                      ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
                      ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
                      ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
                      ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
                      ('Property', 'http://www.irishtimes.com/feeds/rss/newspaper/property.rss'),
                      ('The Tickets', 'http://www.irishtimes.com/feeds/rss/newspaper/theticket.rss'),
                      ('Weekend', 'http://www.irishtimes.com/feeds/rss/newspaper/weekend.rss'),
                      ('News features', 'http://www.irishtimes.com/feeds/rss/newspaper/newsfeatures.rss'),
                      ('Obituaries', 'http://www.irishtimes.com/feeds/rss/newspaper/obituaries.rss'),
                    ]
    def print_version(self, url):
        if url.count('rss.feedsportal.com'):
-            u = 'http://www.irishtimes.com' + \
+            u = url.replace('0Bhtml/story01.htm','_pf0Bhtml/story01.htm')
                     (((url[70:].replace('0C','/')).replace('0A','0'))).replace('0Bhtml/story01.htm','_pf.html')
        else:
            u = url.replace('.html','_pf.html')
        return u
    def get_article_url(self, article):
        return article.link
--- a/recipes/lifehacker.recipe
+++ b/recipes/lifehacker.recipe
@ -1,37 +1,100 @@
 __license__   = 'GPL v3'
 __copyright__ = '2010, NA'
 '''
 lifehacker.com
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 from datetime import datetime
 from calibre.ebooks.BeautifulSoup import Tag
 from calibre.utils.magick import Image, PixelWand
-class Lifehacker(BasicNewsRecipe):
+class LifeHacker(BasicNewsRecipe):
-    title                 = 'Lifehacker'
+    THUMBALIZR_API        = '' # ---->Get your at http://www.thumbalizr.com/ and put here
-    __author__            = 'Kovid Goyal'
+    LANGUAGE              = 'en'
-    description           = "Computers make us more productive. Yeah, right. Lifehacker recommends the software downloads and web sites that actually save time. Don't live to geek; geek to live."
+    LANGHTM               = 'en'
    publisher             = 'lifehacker.com'
    category              = 'news, IT, Internet, gadgets, tips and tricks, howto, diy'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = True
    language = 'en'
    ENCODING              = 'utf'
    ENCHTM                = 'utf-8'
    requires_version      = (0,7,47)
    news                  = True
    title                 = u'LifeHacker'
    __author__            = 'Euler Alves'
    description           = u'Tips, tricks, and downloads for getting things done.'
    publisher             = u'lifehacker.com'
    author                = u'Adam Pash & Kevin Purdy & Adam Dachis & Whitson Gordon & Gina Trapani'
    category              = 'news, rss'
    oldest_article        = 4
    max_articles_per_feed = 20
    summary_length        = 1000
    remove_javascript     = True
    no_stylesheets        = True
    use_embedded_content  = True
    remove_empty_feeds    = True
    timefmt               = ' [%d %b %Y (%a)]'
    hoje                  = datetime.now()
    pubdate               = hoje.strftime('%a, %d %b')
    cover_url             = 'http://api.thumbalizr.com/?api_key='+THUMBALIZR_API+'&url=http://lifehacker.com&width=600&quality=90'
    cover_margins         = (0,0,'white')
    masthead_url          = 'http://cache.gawkerassets.com/assets/lifehacker.com/img/logo.png'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags = [
                    {'class': 'feedflare'},
                    dict(name='div',
                        attrs={'class':[
                            'ad_container'
                            ,'ad_300x250'
                            ,'ad_interstitial'
                            ,'share-wrap'
                            ,'ad_300x600'
                            ,'ad_perma-footer-adsense'
                            ,'ad_perma-panorama'
                            ,'ad panorama'
                            ,'ad_container'
                        ]})
                    ,dict(name='div',
                        attrs={'id':[
                            'agegate_container'
                            ,'agegate_container_rejected'
                            ,'sharemenu-wrap'
                        ]})
    ]
    feeds = [(u'Articles', u'http://feeds.gawker.com/lifehacker/vip?format=xml')]
-    def preprocess_html(self, soup):
+    conversion_options = {
-        return self.adeify_images(soup)
+    'title'            : title
    ,'comments'        : description
    ,'publisher'       : publisher
    ,'tags'            : category
    ,'language'        : LANGUAGE
    ,'linearize_tables': True
    }
    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
        if not soup.find(attrs={'http-equiv':'Content-Language'}):
            meta0 = Tag(soup,'meta',[("http-equiv","Content-Language"),("content",self.LANGHTM)])
            soup.head.insert(0,meta0)
        if not soup.find(attrs={'http-equiv':'Content-Type'}):
            meta1 = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset="+self.ENCHTM)])
            soup.head.insert(0,meta1)
        return soup
    def postprocess_html(self, soup, first):
        #process all the images. assumes that the new html has the correct path
        for tag in soup.findAll(lambda tag: tag.name.lower()=='img' and tag.has_key('src')):
            iurl = tag['src']
            img = Image()
            img.open(iurl)
            width, height = img.size
            print 'img is: ', iurl, 'width is: ', width, 'height is: ', height
            if img < 0:
                raise RuntimeError('Out of memory')
            pw = PixelWand()
            if( width > height and width > 590) :
                print 'Rotate image'
                img.rotate(pw, -90)
                img.save(iurl)
        return soup
--- a/recipes/perfil.recipe
+++ b/recipes/perfil.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 perfil.com
 '''
@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
                      dict(name=['iframe','embed','object','base','meta','link'])
                     ,dict(name='a', attrs={'href':'#comentarios'})
                     ,dict(name='div', attrs={'class':'foto3'})
-                     ,dict(name='img', attrs={'alt':'ampliar'})
+                     ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
                    ]
-    keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
+    keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
    remove_attributes=['onload','lang','width','height','border']
    feeds = [
--- a/recipes/prostamerika.recipe
+++ b/recipes/prostamerika.recipe
@ -0,0 +1,68 @@
 #!/usr/bin/env python
 # encoding: utf-8
 __license__ = 'GPL 3'
 __copyright__ = 'zotzo'
 """
 http://www.prostamerika.com/
 """
 from calibre.web.feeds.news import BasicNewsRecipe
 class ProstAmerika(BasicNewsRecipe):
    title = 'Prost Amerika'
    language = 'en'
    __author__ = 'rylsfan'
    #authors =
    description = 'Seattle soccer with a European accent. News, features, and match reports.'
    publisher =  'ProstAmerika' # 4464 fremont avenue n, # 209, Seattle, 98103, United States
    category = 'Sports'
    oldest_article = 7
    max_articles_per_feed = 100
    cover_url = 'http://img17.imageshack.us/img17/9498/prostamerika.jpg'
    masthead_url = 'http://www.prostamerika.com/soundersfc/wp-content/uploads/2011/02/PASoccer_taglinewhole.jpg'
    encoding = 'utf-8'
    no_stylesheets = True
    use_embedded_content = False
    remove_javascript = True
    feeds =[
               (u'Cascadia',   u'http://www.prostamerika.com/category/localfootball/feed/' ),
               (u'MLS',        u'http://www.prostamerika.com/category/mls/feed/'),
               (u'EPL',        u'http://www.prostamerika.com/category/epl/feed/'),
               (u'World',      u'http://www.prostamerika.com/category/international-soccer/feed/'),
               (u'Fan Culture',u'http://www.prostamerika.com/category/fan-culture/feed/')
           ]
    keep_only_tags = [dict(name='div', attrs={'id':'maincontent'})]
    remove_tags =  [
                       {'class':'tweetmeme_button'},
                       {'class':'wp-caption-text'}
                   ]
    remove_tags_after =[
                           {'class':'tweetmeme_button'}
                       ]
    extra_css = '''
               h1{font-family:Didot,Helvetica,sans-serif; font-weight:bold;font-size:large;}
               h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
               p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
               body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
                '''
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
--- a/recipes/sb_nation.recipe
+++ b/recipes/sb_nation.recipe
@ -0,0 +1,56 @@
 #!/usr/bin/env  python
 __license__   = 'GPL v3'
 __copyright__ = 'Zotzo'
 '''
 http://www.stumptownfooty.com/
 http://www.eightysixforever.com
 http://www.sounderatheart.com
 http://www.dailysoccerfix.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 class SBNation(BasicNewsRecipe):
    title    = u'SBNation'
    __author__ = 'rylsfan'
    description           = u"More than 290 individual communities, each offering high quality year-round coverage and conversation led by fans who are passionate."
    oldest_article        = 3
    language = 'en'
    max_articles_per_feed = 100
    no_stylesheets        = True
    use_embedded_content  = False
    #cover_url = 'http://img132.imageshack.us/img132/4913/2hyggjegqqdywzn9.png'
    keep_only_tags = [
                        dict(name='h2', attrs={'class':'title'})
                       ,dict(name='div', attrs={'class':'entry-body'})
                     ]
    remove_tags_after = dict(name='div', attrs={'class':'footline entry-actions'})
    remove_tags = [
                     dict(name='div', attrs={'class':'footline entry-actions'}),
                     {'class': 'extend-divide'}
                  ]
    # SBNation has 300 special blogs to choose from. These are just a couple!
    feeds       =  [
                       (u'Daily Fix', u'http://www.dailysoccerfix.com/rss/'),
                       (u"Stumptown Footy", u'http://www.stumptownfooty.com/rss/'),
                       (u'Sounders', u'http://www.sounderatheart.com/rss/'),
                       (u'Whitecaps', u'http://www.eightysixforever.com/rss/'),
                   ]
    extra_css  =   """
                    h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
                    p{font-family:Helvetica,sans-serif; display: block; text-align: left; text-decoration: none; text-indent: 0%;}
                    body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
                         """
    def preprocess_html(self, soup):
        return self.adeify_images(soup)
    def populate_article_metadata(self, article, soup, first):
       h2 = soup.find('h2')
       h2.replaceWith(h2.prettify() + '<p><em> By ' + article.author + '</em></p>')
--- a/recipes/smith.recipe
+++ b/recipes/smith.recipe
@ -7,6 +7,7 @@ class SmithsonianMagazine(BasicNewsRecipe):
    __author__     = 'Krittika Goyal'
    oldest_article = 31#days
    max_articles_per_feed = 50
    use_embedded_content = False
    #encoding = 'latin1'
    recursions = 1
    match_regexps = ['&page=[2-9]$']
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -1,3 +1,4 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TimesOfIndia(BasicNewsRecipe):
@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe):
    max_articles_per_feed = 25
    no_stylesheets = True
-    keep_only_tags = [dict(attrs={'class':'maintable12'})]
+    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
    remove_tags = [
            dict(style=lambda x: x and 'float' in x),
-            dict(attrs={'class':'prvnxtbg'}),
+            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
    ]
    feeds          = [
@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
-    def print_version(self, url):
+
-        return url + '?prtpage=1'
+    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/0Ltimesofindia' in url:
            url = url.partition('/0L')[-1]
            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
                    '/').replace('0E', '-')
            url = 'http://' + url.rpartition('/')[0]
            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
            if match is not None:
                num = match.group(1)
                num = re.sub(r'[^0-9]', '', num)
                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
                    num)
        else:
            cms = re.search(r'/(\d+)\.cms', url)
            if cms is not None:
                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
                    cms.group(1))
        return url
    def preprocess_html(self, soup):
        return soup
--- a/recipes/wvhooligan.recipe
+++ b/recipes/wvhooligan.recipe
@ -0,0 +1,61 @@
 #!/usr/bin/env  python
 __license__ = 'GPL 3'
 __copyright__ = 'zotzo'
 __docformat__ = 'restructuredtext en'
 '''
 http://wvhooligan.com/
 '''
 from calibre.web.feeds.news import BasicNewsRecipe
 #import re
 class wvHooligan(BasicNewsRecipe):
    authors = u'Drew Epperley'
    __author__ = 'rylsfan'
    language = 'en'
    version = 2
    title = u'WV Hooligan'
    publisher = u'Drew Epperley'
    publication_type = 'Blog'
    category = u'Soccer'
    description = u'A look at Major League Soccer (MLS) through the eyes of a MLS writer and fan.'
    cover_url = 'http://wvhooligan.com/wp-content/themes/urbanelements/images/logo3.png'
    oldest_article = 15
    max_articles_per_feed = 150
    use_embedded_content = True
    no_stylesheets = True
    remove_javascript = True
    encoding = 'utf8'
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }
    remove_tags =  [
                       {'class': 'feedflare'},
                       {'class': 'tweetmeme_button'},
                   ]
    def preprocess_html(self, soup):
       return self.adeify_images(soup)
    feeds =[
               (u'Stories', u'http://feeds2.feedburner.com/wvhooligan'),
               (u'MLS', u'http://wvhooligan.com/category/mls/feed/'),
               (u'MLS Power Rankings', u'http://wvhooligan.com/category/power-rankings/feed/'),
               (u'MLS Expansion', u'http://wvhooligan.com/category/mls/expansion-talk/feed/'),
               (u'US National Team', u'http://wvhooligan.com/category/us-national-team/feed/'),
               (u'College', u'http://wvhooligan.com/category/college-soccer/feed/'),
           ]
    extra_css = '''
               h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
               h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
               p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
               body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
               '''
--- a/resources/fonts/liberation/LiberationMono-Bold.ttf
+++ b/resources/fonts/liberation/LiberationMono-Bold.ttf
--- a/resources/fonts/liberation/LiberationMono-BoldItalic.ttf
+++ b/resources/fonts/liberation/LiberationMono-BoldItalic.ttf
--- a/resources/fonts/liberation/LiberationMono-Italic.ttf
+++ b/resources/fonts/liberation/LiberationMono-Italic.ttf
--- a/resources/fonts/liberation/LiberationMono-Regular.ttf
+++ b/resources/fonts/liberation/LiberationMono-Regular.ttf
--- a/resources/fonts/liberation/LiberationSans-Bold.ttf
+++ b/resources/fonts/liberation/LiberationSans-Bold.ttf
--- a/resources/fonts/liberation/LiberationSans-BoldItalic.ttf
+++ b/resources/fonts/liberation/LiberationSans-BoldItalic.ttf
--- a/resources/fonts/liberation/LiberationSans-Italic.ttf
+++ b/resources/fonts/liberation/LiberationSans-Italic.ttf
--- a/resources/fonts/liberation/LiberationSans-Regular.ttf
+++ b/resources/fonts/liberation/LiberationSans-Regular.ttf
--- a/resources/fonts/liberation/LiberationSerif-Bold.ttf
+++ b/resources/fonts/liberation/LiberationSerif-Bold.ttf
--- a/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf
+++ b/resources/fonts/liberation/LiberationSerif-BoldItalic.ttf
--- a/resources/fonts/liberation/LiberationSerif-Italic.ttf
+++ b/resources/fonts/liberation/LiberationSerif-Italic.ttf
--- a/resources/fonts/liberation/LiberationSerif-Regular.ttf
+++ b/resources/fonts/liberation/LiberationSerif-Regular.ttf
--- a/setup/publish.py
+++ b/setup/publish.py
@ -45,7 +45,6 @@ class Stage3(Command):
   sub_commands = ['upload_user_manual', 'upload_demo', 'sdist',
            'upload_to_sourceforge', 'upload_to_google_code',
            'tag_release', 'upload_to_server',
            'upload_to_mobileread',
   ]
 class Stage4(Command):
--- a/setup/upload.py
+++ b/setup/upload.py
@ -5,7 +5,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, glob
+import os, re, cStringIO, base64, httplib, subprocess, hashlib, shutil, time, \
    glob, stat
 from subprocess import check_call
 from tempfile import NamedTemporaryFile, mkdtemp
 from zipfile import ZipFile
@ -344,6 +345,8 @@ class UploadUserManual(Command): # {{{
    def build_plugin_example(self, path):
        from calibre import CurrentDir
        with NamedTemporaryFile(suffix='.zip') as f:
            os.fchmod(f.fileno(),
                stat.S_IRUSR|stat.S_IRGRP|stat.S_IROTH|stat.S_IWRITE)
            with CurrentDir(self.d(path)):
                with ZipFile(f, 'w') as zf:
                    for x in os.listdir('.'):
@ -352,8 +355,8 @@ class UploadUserManual(Command): # {{{
                            for y in os.listdir(x):
                                zf.write(os.path.join(x, y))
            bname = self.b(path) + '_plugin.zip'
-            subprocess.check_call(['scp', f.name, 'divok:%s/%s'%(DOWNLOADS,
+            dest = '%s/%s'%(DOWNLOADS, bname)
-                bname)])
+            subprocess.check_call(['scp', f.name, 'divok:'+dest])
    def run(self, opts):
        path = self.j(self.SRC, 'calibre', 'manual', 'plugin_examples')
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -61,6 +61,9 @@ def osx_version():
        if m:
            return int(m.group(1)), int(m.group(2)), int(m.group(3))
 def confirm_config_name(name):
    return name + '_again'
 _filename_sanitize = re.compile(r'[\xae\0\\|\?\*<":>\+/]')
 _filename_sanitize_unicode = frozenset([u'\\', u'|', u'?', u'*', u'<',
    u'"', u':', u'>', u'+', u'/'] + list(map(unichr, xrange(32))))
@ -278,16 +281,17 @@ def get_parsed_proxy(typ='http', debug=True):
 def random_user_agent():
    choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
    ]
    #return choices[-1]
    return choices[random.randint(0, len(choices)-1)]
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.52'
+__version__   = '0.7.53'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 import re, importlib
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -231,6 +231,17 @@ class HTMLMetadataReader(MetadataReaderPlugin):
        from calibre.ebooks.metadata.html import get_metadata
        return get_metadata(stream)
 class HTMLZMetadataReader(MetadataReaderPlugin):
    name        = 'Read HTMLZ metadata'
    file_types  = set(['htmlz'])
    description = _('Read metadata from %s files') % 'HTMLZ'
    author      = 'John Schember'
    def get_metadata(self, stream, ftype):
        from calibre.ebooks.metadata.extz import get_metadata
        return get_metadata(stream)
 class IMPMetadataReader(MetadataReaderPlugin):
    name        = 'Read IMP metadata'
@ -407,7 +418,7 @@ class TXTZMetadataReader(MetadataReaderPlugin):
    author      = 'John Schember'
    def get_metadata(self, stream, ftype):
-        from calibre.ebooks.metadata.txtz import get_metadata
+        from calibre.ebooks.metadata.extz import get_metadata
        return get_metadata(stream)
 class ZipMetadataReader(MetadataReaderPlugin):
@ -433,6 +444,17 @@ class EPUBMetadataWriter(MetadataWriterPlugin):
        from calibre.ebooks.metadata.epub import set_metadata
        set_metadata(stream, mi, apply_null=self.apply_null)
 class HTMLZMetadataWriter(MetadataWriterPlugin):
    name        = 'Set HTMLZ metadata'
    file_types  = set(['htmlz'])
    description = _('Set metadata from %s files') % 'HTMLZ'
    author      = 'John Schember'
    def set_metadata(self, stream, mi, type):
        from calibre.ebooks.metadata.extz import set_metadata
        set_metadata(stream, mi)
 class LRFMetadataWriter(MetadataWriterPlugin):
    name = 'Set LRF metadata'
@ -505,7 +527,7 @@ class TXTZMetadataWriter(MetadataWriterPlugin):
    author      = 'John Schember'
    def set_metadata(self, stream, mi, type):
-        from calibre.ebooks.metadata.txtz import set_metadata
+        from calibre.ebooks.metadata.extz import set_metadata
        set_metadata(stream, mi)
 # }}}
@ -514,6 +536,7 @@ from calibre.ebooks.comic.input import ComicInput
 from calibre.ebooks.epub.input import EPUBInput
 from calibre.ebooks.fb2.input import FB2Input
 from calibre.ebooks.html.input import HTMLInput
 from calibre.ebooks.htmlz.input import HTMLZInput
 from calibre.ebooks.lit.input import LITInput
 from calibre.ebooks.mobi.input import MOBIInput
 from calibre.ebooks.odt.input import ODTInput
@ -544,6 +567,7 @@ from calibre.ebooks.tcr.output import TCROutput
 from calibre.ebooks.txt.output import TXTOutput
 from calibre.ebooks.txt.output import TXTZOutput
 from calibre.ebooks.html.output import HTMLOutput
 from calibre.ebooks.htmlz.output import HTMLZOutput
 from calibre.ebooks.snb.output import SNBOutput
 from calibre.customize.profiles import input_profiles, output_profiles
@ -599,6 +623,7 @@ plugins += [
    EPUBInput,
    FB2Input,
    HTMLInput,
    HTMLZInput,
    LITInput,
    MOBIInput,
    ODTInput,
@ -630,6 +655,7 @@ plugins += [
    TXTOutput,
    TXTZOutput,
    HTMLOutput,
    HTMLZOutput,
    SNBOutput,
 ]
 # Order here matters. The first matched device is the one used.
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -470,8 +470,8 @@ class KoboReaderOutput(OutputProfile):
    description = _('This profile is intended for the Kobo Reader.')
-    screen_size               = (540, 718)
+    screen_size               = (536, 710)
-    comic_screen_size         = (540, 718)
+    comic_screen_size         = (536, 710)
    dpi                       = 168.451
    fbase                     = 12
    fsizes                    = [7.5, 9, 10, 12, 15.5, 20, 22, 24]
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -7,7 +7,7 @@ __docformat__ = 'restructuredtext en'
 import cStringIO, ctypes, datetime, os, re, shutil, subprocess, sys, tempfile, time
 from calibre.constants import __appname__, __version__, DEBUG
-from calibre import fit_image
+from calibre import fit_image, confirm_config_name
 from calibre.constants import isosx, iswindows
 from calibre.devices.errors import OpenFeedback, UserFeedback
 from calibre.devices.usbms.deviceconfig import DeviceConfig
@ -18,34 +18,76 @@ from calibre.ebooks.metadata import authors_to_string, MetaInformation, \
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.ebooks.metadata.epub import set_metadata
 from calibre.library.server.utils import strftime
-from calibre.utils.config import config_dir, prefs
+from calibre.utils.config import config_dir, dynamic, prefs
 from calibre.utils.date import now, parse_date
 from calibre.utils.logging import Log
 from calibre.utils.zipfile import ZipFile
 class AppleOpenFeedback(OpenFeedback):
-    def __init__(self):
+    def __init__(self, plugin):
        OpenFeedback.__init__(self, u'')
        self.log = plugin.log
        self.plugin = plugin
    def custom_dialog(self, parent):
-        from PyQt4.Qt import (QDialog, QVBoxLayout, QLabel, QDialogButtonBox)
+        from PyQt4.Qt import (QDialog, QDialogButtonBox, QIcon,
                              QLabel, QPushButton, QVBoxLayout)
        class Dialog(QDialog):
-            def __init__(self, p):
+            def __init__(self, p, cd, pixmap='dialog_information.png'):
                QDialog.__init__(self, p)
                self.cd = cd
                self.setWindowTitle("Apple iDevice detected")
                self.l = l = QVBoxLayout()
                self.setLayout(l)
-                l.addWidget(QLabel('test'))
+                msg = QLabel()
-                self.bb = QDialogButtonBox(QDialogButtonBox.OK)
+                msg.setText(_(
                            '<p>If you do not want calibre to recognize your Apple iDevice '
                            'when it is connected to your computer, '
                            'click <b>Disable Apple Driver</b>.</p>'
                            '<p>To transfer books to your iDevice, '
                            'click <b>Disable Apple Driver</b>, '
                            "then use the 'Connect to iTunes' method recommended in the "
                            '<a href="http://www.mobileread.com/forums/showthread.php?t=118559">Calibre + iDevices FAQ</a>, '
                            'using the <em>Connect/Share</em>|<em>Connect to iTunes</em> menu item.</p>'
                            '<p>Enabling the Apple driver for direct connection to iDevices '
                            'is an unsupported advanced user mode.</p>'
                            '<p></p>'
                            ))
                msg.setOpenExternalLinks(True)
                msg.setWordWrap(True)
                l.addWidget(msg)
                self.bb = QDialogButtonBox()
                disable_driver = QPushButton(_("Disable Apple driver"))
                disable_driver.setDefault(True)
                self.bb.addButton(disable_driver, QDialogButtonBox.RejectRole)
                enable_driver = QPushButton(_("Enable Apple driver"))
                self.bb.addButton(enable_driver, QDialogButtonBox.AcceptRole)
                l.addWidget(self.bb)
                self.bb.accepted.connect(self.accept)
                self.bb.rejected.connect(self.reject)
-        return Dialog(parent)
+                self.setWindowIcon(QIcon(I(pixmap)))
                self.resize(self.sizeHint())
                self.finished.connect(self.do_it)
            def do_it(self, return_code):
                if return_code == self.Accepted:
                    self.cd.log.info(" Apple driver ENABLED")
                    dynamic[confirm_config_name(self.cd.plugin.DISPLAY_DISABLE_DIALOG)] = False
                else:
                    from calibre.customize.ui import disable_plugin
                    self.cd.log.info(" Apple driver DISABLED")
                    disable_plugin(self.cd.plugin)
        return Dialog(parent, self)
 from PIL import Image as PILImage
 from lxml import etree
@ -77,15 +119,11 @@ class DriverBase(DeviceConfig, DevicePlugin):
                    'iBooks Category'),
            _('Cache covers from iTunes/iBooks') +
                ':::' +
-                _('Enable to cache and display covers from iTunes/iBooks'),
+                _('Enable to cache and display covers from iTunes/iBooks')
            _("Skip 'Connect to iTunes' recommendation") +
                ':::' +
                _("Enable to skip the 'Connect to iTunes' recommendation dialog")
    ]
    EXTRA_CUSTOMIZATION_DEFAULT = [
                True,
                True,
                False,
    ]
@ -141,12 +179,13 @@ class ITUNES(DriverBase):
    supported_platforms = ['osx','windows']
    author = 'GRiker'
    #: The version of this plugin as a 3-tuple (major, minor, revision)
-    version        = (0,9,0)
+    version        = (1,0,0)
    DISPLAY_DISABLE_DIALOG = "display_disable_apple_driver_dialog"
    # EXTRA_CUSTOMIZATION_MESSAGE indexes
    USE_SERIES_AS_CATEGORY = 0
    CACHE_COVERS = 1
    SKIP_CONNECT_TO_ITUNES_DIALOG = 2
    OPEN_FEEDBACK_MESSAGE = _(
        'Apple device detected, launching iTunes, please wait ...')
@ -762,13 +801,15 @@ class ITUNES(DriverBase):
        Note that most of the initialization is necessarily performed in can_handle(), as
        we need to talk to iTunes to discover if there's a connected iPod
        '''
        if DEBUG:
            self.log.info("ITUNES.open()")
-        # Display a dialog recommending using 'Connect to iTunes'
+        # Display a dialog recommending using 'Connect to iTunes' if user hasn't
-        if False and not self.settings().extra_customization[self.SKIP_CONNECT_TO_ITUNES_DIALOG]:
+        # previously disabled the dialog
-            raise AppleOpenFeedback()
+        if dynamic.get(confirm_config_name(self.DISPLAY_DISABLE_DIALOG),True):
-
+            raise AppleOpenFeedback(self)
        else:
            if DEBUG:
                self.log.info(" advanced user mode, directly connecting to iDevice")
--- a/src/calibre/devices/bambook/libbambookcore.py
+++ b/src/calibre/devices/bambook/libbambookcore.py
@ -10,7 +10,7 @@ Sanda library wrapper
 import ctypes, uuid, hashlib, os, sys
 from threading import Event, Lock
-from calibre.constants import iswindows, islinux, isosx
+from calibre.constants import iswindows
 from calibre import load_library
 try:
@ -29,12 +29,9 @@ try:
 except:
    lib_handle = None
 text_encoding = 'utf-8'
 if iswindows:
    text_encoding = 'mbcs'
 elif islinux:
    text_encoding = 'utf-8'
 elif isosx:
    text_encoding = 'utf-8'
 def is_bambook_lib_ready():
    return lib_handle != None
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS):
    BCD         = [0x0324]
    VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902',
            'PB903', 'PB']
 class POCKETBOOK701(USBMS):
--- a/src/calibre/devices/errors.py
+++ b/src/calibre/devices/errors.py
@ -43,7 +43,7 @@ class OpenFeedback(DeviceError):
    def custom_dialog(self, parent):
        '''
-        If you need to show the user a custom dialog, instead if just
+        If you need to show the user a custom dialog, instead of just
        displaying the feedback_msg, create and return it here.
        '''
        raise NotImplementedError
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -100,6 +100,12 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
        try:
            if encoding.lower().strip() == 'macintosh':
                encoding = 'mac-roman'
            if encoding.lower().replace('_', '-').strip() in (
                    'gb2312', 'chinese', 'csiso58gb231280', 'euc-cn', 'euccn',
                    'eucgb2312-cn', 'gb2312-1980', 'gb2312-80', 'iso-ir-58'):
                # Microsoft Word exports to HTML with encoding incorrectly set to
                # gb2312 instead of gbk. gbk is a superset of gb2312, anyway.
                encoding = 'gbk'
            raw = raw.decode(encoding, 'replace')
        except LookupError:
            encoding = 'utf-8'
@ -110,4 +116,6 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
    if resolve_entities:
        raw = substitute_entites(raw)
    return raw, encoding
--- a/src/calibre/ebooks/conversion/plumber.py
+++ b/src/calibre/ebooks/conversion/plumber.py
@ -1003,8 +1003,10 @@ OptionRecommendation(name='sr3_replace',
        self.opts.insert_blank_line = oibl
        self.opts.remove_paragraph_spacing = orps
-        from calibre.ebooks.oeb.transforms.page_margin import RemoveFakeMargins
+        from calibre.ebooks.oeb.transforms.page_margin import \
            RemoveFakeMargins, RemoveAdobeMargins
        RemoveFakeMargins()(self.oeb, self.log, self.opts)
        RemoveAdobeMargins()(self.oeb, self.log, self.opts)
        pr(0.9)
        self.flush()
--- a/src/calibre/ebooks/htmlz/init.py
+++ b/src/calibre/ebooks/htmlz/init.py
--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -0,0 +1,66 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from calibre import walk
 from calibre.customize.conversion import InputFormatPlugin
 from calibre.utils.zipfile import ZipFile
 class HTMLZInput(InputFormatPlugin):
    name        = 'HTLZ Input'
    author      = 'John Schember'
    description = 'Convert HTML files to HTML'
    file_types  = set(['htmlz'])
    def convert(self, stream, options, file_ext, log,
                accelerators):
        self.log = log
        html = u''
        # Extract content from zip archive.
        zf = ZipFile(stream)
        zf.extractall('.')
        for x in walk('.'):
            if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
                with open(x, 'rb') as tf:
                    html = tf.read()
                    break
        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
        for opt in html_input.options:
            setattr(options, opt.option.name, opt.recommended_value)
        options.input_encoding = 'utf-8'
        base = os.getcwdu()
        fname = os.path.join(base, 'index.html')
        c = 0
        while os.path.exists(fname):
            c += 1
            fname = 'index%d.html'%c
        htmlfile = open(fname, 'wb')
        with htmlfile:
            htmlfile.write(html.encode('utf-8'))
        odi = options.debug_pipeline
        options.debug_pipeline = None
        # Generate oeb from html conversion.
        oeb = html_input.convert(open(htmlfile.name, 'rb'), options, 'html', log,
                {})
        options.debug_pipeline = odi
        os.remove(htmlfile.name)
        # Set metadata from file.
        from calibre.customize.ui import get_file_type_metadata
        from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata
        mi = get_file_type_metadata(stream, file_ext)
        meta_info_to_oeb_metadata(mi, oeb.metadata, log)
        return oeb
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@ -0,0 +1,371 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 '''
 Transform OEB content into a single (more or less) HTML file.
 '''
 import os
 from urlparse import urlparse
 from calibre import prepare_string_for_xml
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.utils.logging import default_log
 class OEB2HTML(object):
    '''
    Base class. All subclasses should implement dump_text to actually transform
    content. Also, callers should use oeb2html to get the transformed html.
    links and images can be retrieved after calling oeb2html to get the mapping
    of OEB links and images to the new names used in the html returned by oeb2html.
    Images will always be referenced as if they are in an images directory.
    Use get_css to get the CSS classes for the OEB document as a string.
    '''
    def __init__(self, log=None):
        self.log = default_log if log is None else log
        self.links = {}
        self.images = {}
    def oeb2html(self, oeb_book, opts):
        self.log.info('Converting OEB book to HTML...')
        self.opts = opts
        self.links = {}
        self.images = {}
        return self.mlize_spine(oeb_book)
    def mlize_spine(self, oeb_book):
        output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
            output.append('\n\n')
        output.append('</body></html>')
        return ''.join(output)
    def dump_text(self, elem, stylizer, page):
        raise NotImplementedError
    def get_link_id(self, href, aid):
        aid = '%s#%s' % (href, aid)
        if aid not in self.links:
            self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
        return self.links[aid]
    def rewrite_link(self, tag, attribs, page):
        # Rewrite ids.
        if 'id' in attribs:
            attribs['id'] = self.get_link_id(page.href, attribs['id'])
        # Rewrite links.
        if tag == 'a' and 'href' in attribs:
            href = page.abshref(attribs['href'])
            if self.url_is_relative(href):
                id = ''
                if '#' in href:
                    href, n, id = href.partition('#')
                href = '#%s' % self.get_link_id(href, id)
                attribs['href'] = href
        return attribs
    def rewrite_image(self, tag, attribs, page):
        if tag == 'img':
            src = attribs.get('src', None)
            if src:
                src = page.abshref(src)
                if src not in self.images:
                    ext = os.path.splitext(src)[1]
                    fname = '%s%s' % (len(self.images), ext)
                    fname = fname.zfill(10)
                    self.images[src] = fname
                attribs['src'] = 'images/%s' % self.images[src]
        return attribs
    def url_is_relative(self, url):
        o = urlparse(url)
        return False if o.scheme else True
    def get_css(self, oeb_book):
        css = u''
        for item in oeb_book.manifest:
            if item.media_type == 'text/css':
                css = item.data.cssText
                break
        return css
 class OEB2HTMLNoCSSizer(OEB2HTML):
    '''
    This will remap a small number of CSS styles to equivalent HTML tags.
    '''
    def dump_text(self, elem, stylizer, page):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
        '''
        # We can only processes tags. If there isn't a tag return any text.
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            p = elem.getparent()
            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
                    and elem.tail:
                return [elem.tail]
            return ['']
        # Setup our variables.
        text = ['']
        style = stylizer.style(elem)
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib
        attribs = self.rewrite_link(tag, attribs, page)
        attribs = self.rewrite_image(tag, attribs, page)
        if tag == 'body':
            tag = 'div'
            attribs['id'] = self.get_link_id(page.href, '')
        tags.append(tag)
        # Ignore anything that is set to not be displayed.
        if style['display'] in ('none', 'oeb-page-head', 'oeb-page-foot') \
           or style['visibility'] == 'hidden':
            return ['']
        # Remove attributes we won't want.
        if 'class' in attribs:
            del attribs['class']
        if 'style' in attribs:
            del attribs['style']
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
        # Write the tag.
        text.append('<%s%s>' % (tag, at))
        # Turn styles into tags.
        if style['font-weight'] in ('bold', 'bolder'):
            text.append('<b>')
            tags.append('b')
        if style['font-style'] == 'italic':
            text.append('<i>')
            tags.append('i')
        if style['text-decoration'] == 'underline':
            text.append('<u>')
            tags.append('u')
        if style['text-decoration'] == 'line-through':
            text.append('<s>')
            tags.append('s')
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            text.append(elem.text)
        # Recurse down into tags within the tag we are in.
        for item in elem:
            text += self.dump_text(item, stylizer, page)
        # Close all open tags.
        tags.reverse()
        for t in tags:
            text.append('</%s>' % t)
        # Add the text that is outside of the tag.
        if hasattr(elem, 'tail') and elem.tail:
            text.append(elem.tail)
        return text
 class OEB2HTMLInlineCSSizer(OEB2HTML):
    '''
    Turns external CSS classes into inline style attributes.
    '''
    def dump_text(self, elem, stylizer, page):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
        '''
        # We can only processes tags. If there isn't a tag return any text.
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            p = elem.getparent()
            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
                    and elem.tail:
                return [elem.tail]
            return ['']
        # Setup our variables.
        text = ['']
        style = stylizer.style(elem)
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib
        attribs = self.rewrite_link(tag, attribs, page)
        attribs = self.rewrite_image(tag, attribs, page)
        style_a = '%s' % style
        if tag == 'body':
            tag = 'div'
            attribs['id'] = self.get_link_id(page.href, '')
            if not style['page-break-before'] == 'always':
                style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
        tags.append(tag)
        # Remove attributes we won't want.
        if 'class' in attribs:
            del attribs['class']
        if 'style' in attribs:
            del attribs['style']
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
        # Turn style into strings for putting in the tag.
        style_t = ''
        if style_a:
            style_t = ' style="%s"' % style_a
        # Write the tag.
        text.append('<%s%s%s>' % (tag, at, style_t))
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            text.append(elem.text)
        # Recurse down into tags within the tag we are in.
        for item in elem:
            text += self.dump_text(item, stylizer, page)
        # Close all open tags.
        tags.reverse()
        for t in tags:
            text.append('</%s>' % t)
        # Add the text that is outside of the tag.
        if hasattr(elem, 'tail') and elem.tail:
            text.append(elem.tail)
        return text
 class OEB2HTMLClassCSSizer(OEB2HTML):
    '''
    Use CSS classes. css_style option can specify whether to use
    inline classes (style tag in the head) or reference an external
    CSS file called style.css.
    '''
    def mlize_spine(self, oeb_book):
        output = []
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
            output.append('\n\n')
        if self.opts.htmlz_class_style == 'external':
            css = u'<link href="style.css" rel="stylesheet" type="text/css" />'
        else:
            css =  u'<style type="text/css">' + self.get_css(oeb_book) + u'</style>'
        output = [u'<html><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" />'] + [css] + [u'</head><body>'] + output + [u'</body></html>']
        return ''.join(output)
    def dump_text(self, elem, stylizer, page):
        '''
        @elem: The element in the etree that we are working on.
        @stylizer: The style information attached to the element.
        '''
        # We can only processes tags. If there isn't a tag return any text.
        if not isinstance(elem.tag, basestring) \
           or namespace(elem.tag) != XHTML_NS:
            p = elem.getparent()
            if p is not None and isinstance(p.tag, basestring) and namespace(p.tag) == XHTML_NS \
                    and elem.tail:
                return [elem.tail]
            return ['']
        # Setup our variables.
        text = ['']
        #style = stylizer.style(elem)
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib
        attribs = self.rewrite_link(tag, attribs, page)
        attribs = self.rewrite_image(tag, attribs, page)
        if tag == 'body':
            tag = 'div'
            attribs['id'] = self.get_link_id(page.href, '')
        tags.append(tag)
        # Remove attributes we won't want.
        if 'style' in attribs:
            del attribs['style']
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
            at += ' %s="%s"' % (k, prepare_string_for_xml(v, attribute=True))
        # Write the tag.
        text.append('<%s%s>' % (tag, at))
        # Process tags that contain text.
        if hasattr(elem, 'text') and elem.text:
            text.append(elem.text)
        # Recurse down into tags within the tag we are in.
        for item in elem:
            text += self.dump_text(item, stylizer, page)
        # Close all open tags.
        tags.reverse()
        for t in tags:
            text.append('</%s>' % t)
        # Add the text that is outside of the tag.
        if hasattr(elem, 'tail') and elem.tail:
            text.append(elem.tail)
        return text
 def oeb2html_no_css(oeb_book, log, opts):
    izer = OEB2HTMLNoCSSizer(log)
    html = izer.oeb2html(oeb_book, opts)
    images = izer.images
    return (html, images)
 def oeb2html_inline_css(oeb_book, log, opts):
    izer = OEB2HTMLInlineCSSizer(log)
    html = izer.oeb2html(oeb_book, opts)
    images = izer.images
    return (html, images)
 def oeb2html_class_css(oeb_book, log, opts):
    izer = OEB2HTMLClassCSSizer(log)
    setattr(opts, 'class_style', 'inline')
    html = izer.oeb2html(oeb_book, opts)
    images = izer.images
    return (html, images)
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -0,0 +1,83 @@
 # -*- coding: utf-8 -*-
 from __future__ import (unicode_literals, division, absolute_import, print_function)
 __license__ = 'GPL 3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 import os
 from lxml import etree
 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
 from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile
 class HTMLZOutput(OutputFormatPlugin):
    name = 'HTMLZ Output'
    author = 'John Schember'
    file_type = 'htmlz'
    options = set([
        OptionRecommendation(name='htmlz_css_type', recommended_value='class',
            level=OptionRecommendation.LOW,
            choices=['class', 'inline', 'tag'],
            help=_('Specify the handling of CSS. Default is class.\n'
                   'class: Use CSS classes and have elements reference them.\n'
                   'inline: Write the CSS as an inline style attribute.\n'
                   'tag: Turn as many CSS styles as possible into HTML tags.'
            )),
        OptionRecommendation(name='htmlz_class_style', recommended_value='external',
            level=OptionRecommendation.LOW,
            choices=['external', 'inline'],
            help=_('How to handle the CSS when using css-type = \'class\'.\n'
                   'Default is external.\n'
                   'external: Use an external CSS file that is linked in the document.\n'
                   'inline: Place the CSS in the head section of the document.'
            )),
    ])
    def convert(self, oeb_book, output_path, input_plugin, opts, log):
        # HTML
        if opts.htmlz_css_type == 'inline':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLInlineCSSizer
            OEB2HTMLizer = OEB2HTMLInlineCSSizer
        elif opts.htmlz_css_type == 'tag':
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLNoCSSizer
            OEB2HTMLizer = OEB2HTMLNoCSSizer
        else:
            from calibre.ebooks.htmlz.oeb2html import OEB2HTMLClassCSSizer as OEB2HTMLizer
        with TemporaryDirectory('_htmlz_output') as tdir:
            htmlizer = OEB2HTMLizer(log)
            html = htmlizer.oeb2html(oeb_book, opts)
            with open(os.path.join(tdir, 'index.html'), 'wb') as tf:
                tf.write(html)
            # CSS
            if opts.htmlz_css_type == 'class' and opts.htmlz_class_style == 'external':
                with open(os.path.join(tdir, 'style.css'), 'wb') as tf:
                    tf.write(htmlizer.get_css(oeb_book))
            # Images
            images = htmlizer.images
            if images:
                if not os.path.exists(os.path.join(tdir, 'images')):
                    os.makedirs(os.path.join(tdir, 'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
                            img.write(item.data)
            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
                mdataf.write(etree.tostring(oeb_book.metadata.to_opf1()))
            htmlz = ZipFile(output_path, 'w')
            htmlz.add_dir(tdir)
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -6,7 +6,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
-import os, textwrap
+import os, textwrap, sys
 from copy import deepcopy
 from lxml import etree
@ -413,7 +413,12 @@ class LRFInput(InputFormatPlugin):
                ('calibre', 'image-block'): image_block,
                }
        transform = etree.XSLT(styledoc, extensions=extensions)
        try:
            result = transform(doc)
        except RuntimeError:
            sys.setrecursionlimit(5000)
            result = transform(doc)
        with open('content.opf', 'wb') as f:
            f.write(result)
        styles.write()
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -125,7 +125,10 @@ class Metadata(object):
        _data = object.__getattribute__(self, '_data')
        if field in TOP_LEVEL_IDENTIFIERS:
            field, val = self._clean_identifier(field, val)
-            _data['identifiers'].update({field: val})
+            identifiers = _data['identifiers']
            identifiers.pop(field, None)
            if val:
                identifiers[field] = val
        elif field == 'identifiers':
            if not val:
                val = copy.copy(NULL_VALUES.get('identifiers', None))
@ -198,7 +201,9 @@ class Metadata(object):
        return copy.deepcopy(ans)
    def _clean_identifier(self, typ, val):
        if typ:
            typ = icu_lower(typ).strip().replace(':', '').replace(',', '')
        if val:
            val = val.strip().replace(',', '|').replace(':', '|')
        return typ, val
@ -222,8 +227,7 @@ class Metadata(object):
        identifiers = object.__getattribute__(self,
            '_data')['identifiers']
-        if not val and typ in identifiers:
+        identifiers.pop(typ, None)
            identifiers.pop(typ)
        if val:
            identifiers[typ] = val
@ -645,7 +649,7 @@ class Metadata(object):
            fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
        if self.series:
            fmt('Series', self.series + ' #%s'%self.format_series_index())
-        if self.language:
+        if not self.is_null('language'):
            fmt('Language', self.language)
        if self.rating is not None:
            fmt('Rating', self.rating)
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -4,7 +4,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, John Schember <john@nachtimwald.com>'
 '''
-Read meta information from TXT files
+Read meta information from extZ (TXTZ, HTMLZ...) files.
 '''
 import os
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -193,6 +193,7 @@ class ResultList(list):
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=40):
    br   = browser()
    br.set_handle_gzip(True)
    start, entries = 1, []
    while start > 0 and len(entries) <= max_results:
        new, start = Query(title=title, author=author, publisher=publisher,
--- a/src/calibre/ebooks/metadata/meta.py
+++ b/src/calibre/ebooks/metadata/meta.py
@ -182,6 +182,19 @@ def metadata_from_filename(name, pat=None):
            mi.isbn = si
        except (IndexError, ValueError):
            pass
        try:
            publisher = match.group('publisher')
            mi.publisher = publisher
        except (IndexError, ValueError):
            pass
        try:
            pubdate = match.group('published')
            if pubdate:
                from calibre.utils.date import parse_date
                mi.pubdate = parse_date(pubdate)
        except:
            pass
    if mi.is_null('title'):
        mi.title = name
    return mi
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{
    '''
    Get book details from amazons book page in a separate thread
@ -218,6 +218,9 @@ class Worker(Thread): # {{{
                    ' @class="emptyClear" or @href]'):
                c.getparent().remove(c)
            desc = tostring(desc, method='html', encoding=unicode).strip()
            # Encoding bug in Amazon data U+fffd (replacement char)
            # in some examples it is present in place of '
            desc = desc.replace('\ufffd', "'")
            # remove all attributes from tags
            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
            # Collapse whitespace
@ -276,12 +279,14 @@ class Worker(Thread): # {{{
 class Amazon(Source):
-    name = 'Amazon'
+    name = 'Amazon Metadata'
    description = _('Downloads metadata from Amazon')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True
    AMAZON_DOMAINS = {
            'com': _('US'),
@ -408,6 +413,18 @@ class Amazon(Source):
                    if 'bulk pack' not in title:
                        matches.append(a.get('href'))
                    break
            if not matches:
                # This can happen for some user agents that Amazon thinks are
                # mobile/less capable
                log('Trying alternate results page markup')
                for td in root.xpath(
                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
                    for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
                        title = tostring(a, method='text', encoding=unicode).lower()
                        if 'bulk pack' not in title:
                            matches.append(a.get('href'))
                        break
        # Keep only the top 5 matches as the matches are sorted by relevance by
        # Amazon so lower matches are not likely to be very relevant
@ -476,14 +493,14 @@ class Amazon(Source):
        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/amazon.py
@ -504,7 +521,7 @@ if __name__ == '__main__': # tests {{{
            (  # This isbn not on amazon
                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
                    'authors':['Lutz']},
-                [title_test('Learning Python: Powerful Object-Oriented Programming',
+                [title_test('Learning Python, 3rd Edition',
                    exact=True), authors_test(['Mark Lutz'])
                 ]
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -15,8 +15,20 @@ from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import capitalize, lower
 from calibre.ebooks.metadata import check_isbn
-msprefs = JSONConfig('metadata_sources.json')
+msprefs = JSONConfig('metadata_sources/global.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
 msprefs.defaults['cover_priorities'] = {'Google':2}
 def create_log(ostream=None):
    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@ -88,6 +100,39 @@ class InternalMetadataCompareKeyGen(object):
 # }}}
 def get_cached_cover_urls(mi):
    from calibre.customize.ui import metadata_plugins
    plugins = list(metadata_plugins(['identify']))
    for p in plugins:
        url = p.get_cached_cover_url(mi.identifiers)
        if url:
            yield (p, url)
 def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    if re.match(r'([a-z]\.){2,}$', lt) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    return capitalize(token)
 def fixauthors(authors):
    if not authors:
        return authors
    ans = []
    for x in authors:
        ans.append(' '.join(map(cap_author_token, x.split())))
    return ans
 def fixcase(x):
    if x:
        x = titlecase(x)
    return x
 class Source(Plugin):
    type = _('Metadata source')
@ -103,6 +148,15 @@ class Source(Plugin):
    #: during the identify phase
    touched_fields = frozenset()
    #: Set this to True if your plugin return HTML formatted comments
    has_html_comments = False
    #: Setting this to True means that the browser object will add
    #: Accept-Encoding: gzip to all requests. This can speedup downloads
    #: but make sure that the source actually supports gzip transfer encoding
    #: correctly first
    supports_gzip_transfer_encoding = False
    def __init__(self, *args, **kwargs):
        Plugin.__init__(self, *args, **kwargs)
        self._isbn_to_identifier_cache = {}
@ -126,6 +180,8 @@ class Source(Plugin):
    def browser(self):
        if self._browser is None:
            self._browser = browser(user_agent=random_user_agent())
            if self.supports_gzip_transfer_encoding:
                self._browser.set_handle_gzip(True)
        return self._browser.clone_browser()
    # }}}
@ -228,14 +284,11 @@ class Source(Plugin):
        before putting the Metadata object into result_queue. You can of
        course, use a custom algorithm suited to your metadata source.
        '''
        def fixcase(x):
            if x:
                x = titlecase(x)
            return x
        if mi.title:
            mi.title = fixcase(mi.title)
-        mi.authors = list(map(fixcase, mi.authors))
+        mi.authors = fixauthors(mi.authors)
        mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)
    # }}}
@ -314,7 +367,8 @@ class Source(Plugin):
            title=None, authors=None, identifiers={}, timeout=30):
        '''
        Download a cover and put it into result_queue. The parameters all have
-        the same meaning as for :meth:`identify`.
+        the same meaning as for :meth:`identify`. Put (self, cover_data) into
        result_queue.
        This method should use cached cover URLs for efficiency whenever
        possible. When cached data is not present, most plugins simply call
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -0,0 +1,98 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from io import BytesIO
 from threading import Event
 from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ebooks.metadata import string_to_authors
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
 def option_parser():
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
        Fetch book metadata from online sources. You must specify at least one
        of title, authors or ISBN.
        '''
    ))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--authors', help='Book author(s)')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-v', '--verbose', default=False, action='store_true',
                      help='Print the log to the console (stderr)')
    parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
    parser.add_option('-c', '--cover',
            help='Specify a filename. The cover, if available, will be saved to it')
    parser.add_option('-d', '--timeout', default='30',
            help='Timeout in seconds. Default is 30')
    return parser
 def main(args=sys.argv):
    parser = option_parser()
    opts, args = parser.parse_args(args)
    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)
    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout))
    if not results:
        print (log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]
    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover
    log = buf.getvalue()
    result = (metadata_to_opf(result) if opts.opf else
                    unicode(result).encode('utf-8'))
    if opts.verbose:
        print (log, file=sys.stderr)
    print (result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@ -0,0 +1,178 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 from Queue import Queue, Empty
 from threading import Thread, Event
 from io import BytesIO
 from calibre.customize.ui import metadata_plugins
 from calibre.ebooks.metadata.sources.base import msprefs, create_log
 from calibre.utils.magick.draw import Image, save_cover_data_to
 class Worker(Thread):
    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
        Thread.__init__(self)
        self.daemon = True
        self.plugin = plugin
        self.abort = abort
        self.buf = BytesIO()
        self.log = create_log(self.buf)
        self.title, self.authors, self.identifiers = (title, authors,
                identifiers)
        self.timeout, self.rq = timeout, rq
        self.time_spent = None
    def run(self):
        start_time = time.time()
        if not self.abort.is_set():
            try:
                self.plugin.download_cover(self.log, self.rq, self.abort,
                    title=self.title, authors=self.authors,
                    identifiers=self.identifiers, timeout=self.timeout)
            except:
                self.log.exception('Failed to download cover from',
                        self.plugin.name)
        self.time_spent = time.time() - start_time
 def is_worker_alive(workers):
    for w in workers:
        if w.is_alive():
            return True
    return False
 def process_result(log, result):
    plugin, data = result
    try:
        im = Image()
        im.load(data)
        im.trim(10)
        width, height = im.size
        fmt = im.format
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(im, '/cover.jpg', return_data=True)
    except:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
 def run_download(log, results, abort,
        title=None, authors=None, identifiers={}, timeout=30):
    '''
    Run the cover download, putting results into the queue :param:`results`.
    Each result is a tuple of the form:
        (plugin, width, height, fmt, bytes)
    '''
    plugins = list(metadata_plugins(['cover']))
    rq = Queue()
    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
            in plugins]
    for w in workers:
        w.start()
    first_result_at = None
    wait_time = msprefs['wait_after_first_cover_result']
    found_results = {}
    while True:
        time.sleep(0.1)
        try:
            x = rq.get_nowait()
            result = process_result(log, x)
            if result is not None:
                results.put(result)
                found_results[result[0]] = result
                if first_result_at is not None:
                    first_result_at = time.time()
        except Empty:
            pass
        if not is_worker_alive(workers):
            break
        if first_result_at is not None and time.time() - first_result_at > wait_time:
            log('Not waiting for any more results')
            abort.set()
        if abort.is_set():
            break
    while True:
        try:
            x = rq.get_nowait()
            result = process_result(log, x)
            if result is not None:
                results.put(result)
                found_results[result[0]] = result
        except Empty:
            break
    for w in workers:
        wlog = w.buf.getvalue().strip()
        log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
        log('Request extra headers:', w.plugin.browser.addheaders)
        if w.plugin in found_results:
            result = found_results[w.plugin]
            log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
        else:
            log('Failed to download valid cover')
        if w.time_spent is None:
            log('Download aborted')
        else:
            log('Took', w.time_spent, 'seconds')
        if wlog:
            log(wlog)
        log('\n'+'*'*80)
 def download_cover(log,
        title=None, authors=None, identifiers={}, timeout=30):
    '''
    Synchronous cover download. Returns the "best" cover as per user
    prefs/cover resolution.
    Return cover is a tuple: (plugin, width, height, fmt, data)
    Returns None if no cover is found.
    '''
    rq = Queue()
    abort = Event()
    run_download(log, rq, abort, title=title, authors=authors,
            identifiers=identifiers, timeout=timeout)
    results = []
    while True:
        try:
            results.append(rq.get_nowait())
        except Empty:
            break
    cp = msprefs['cover_priorities']
    def keygen(result):
        plugin, width, height, fmt, data = result
        return (cp.get(plugin.name, 1), 1/(width*height))
    results.sort(key=keygen)
    return results[0] if results else None
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -145,21 +145,25 @@ def to_metadata(browser, log, entry_, timeout): # {{{
            log.exception('Failed to parse rating')
    # Cover
-    mi.has_google_cover = len(extra.xpath(
+    mi.has_google_cover = None
-        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
+    for x in extra.xpath(
            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
        mi.has_google_cover = x.get('href')
        break
    return mi
 # }}}
 class GoogleBooks(Source):
-    name = 'Google Books'
+    name = 'Google'
    description = _('Downloads metadata from Google Books')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
        'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:google']) # language currently disabled
    supports_gzip_transfer_encoding = True
    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
@ -212,7 +216,7 @@ class GoogleBooks(Source):
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
-                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
@ -222,9 +226,10 @@ class GoogleBooks(Source):
        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
@ -270,6 +275,9 @@ class GoogleBooks(Source):
            identifiers={}, timeout=30):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if not query:
            log.error('Insufficient metadata to construct query')
            return
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -8,16 +8,21 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 from datetime import datetime
 from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
 from operator import attrgetter
 from calibre.customize.ui import metadata_plugins
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import create_log, msprefs
-
+from calibre.ebooks.metadata.xisbn import xisbn
-# How long to wait for more results after first result is found
+from calibre.ebooks.metadata.book.base import Metadata
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
+from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
 from calibre.utils.icu import lower
 # Download worker {{{
 class Worker(Thread):
    def __init__(self, plugin, kwargs, abort):
@ -30,10 +35,12 @@ class Worker(Thread):
        self.log = create_log(self.buf)
    def run(self):
        start = time.time()
        try:
            self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
        except:
            self.log.exception('Plugin', self.plugin.name, 'failed')
        self.plugin.dl_time_spent = time.time() - start
 def is_worker_alive(workers):
    for w in workers:
@ -41,8 +48,209 @@ def is_worker_alive(workers):
            return True
    return False
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+# }}}
-    plugins = list(metadata_plugins['identify'])
+
 # Merge results from different sources {{{
 class ISBNMerge(object):
    def __init__(self):
        self.pools = {}
        self.isbnless_results = []
    def isbn_in_pool(self, isbn):
        if isbn:
            for isbns, pool in self.pools.iteritems():
                if isbn in isbns:
                    return pool
        return None
    def pool_has_result_from_same_source(self, pool, result):
        results = pool[1]
        for r in results:
            if r.identify_plugin is result.identify_plugin:
                return True
        return False
    def add_result(self, result):
        isbn = result.isbn
        if isbn:
            pool = self.isbn_in_pool(isbn)
            if pool is None:
                isbns, min_year = xisbn.get_isbn_pool(isbn)
                if not isbns:
                    isbns = frozenset([isbn])
                self.pools[isbns] = pool = (min_year, [])
            if not self.pool_has_result_from_same_source(pool, result):
                pool[1].append(result)
        else:
            self.isbnless_results.append(result)
    def finalize(self):
        has_isbn_result = False
        for results in self.pools.itervalues():
            if results:
                has_isbn_result = True
                break
        self.has_isbn_result = has_isbn_result
        if has_isbn_result:
            self.merge_isbn_results()
        else:
            results = sorted(self.isbnless_results,
                    key=attrgetter('relevance_in_source'))
            # Pick only the most relevant result from each source
            self.results = []
            seen = set()
            for result in results:
                if result.identify_plugin not in seen:
                    seen.add(result.identify_plugin)
                    self.results.append(result)
                    result.average_source_relevance = \
                        result.relevance_in_source
        self.merge_metadata_results()
        return self.results
    def merge_metadata_results(self):
        ' Merge results with identical title and authors '
        groups = {}
        for result in self.results:
            title = lower(result.title if result.title else '')
            key = (title, tuple([lower(x) for x in result.authors]))
            if key not in groups:
                groups[key] = []
            groups[key].append(result)
        if len(groups) != len(self.results):
            self.results = []
            for rgroup in groups.itervalues():
                rel = [r.average_source_relevance for r in rgroup]
                if len(rgroup) > 1:
                    result = self.merge(rgroup, None, do_asr=False)
                    result.average_source_relevance = sum(rel)/len(rel)
                else:
                    result = rgroup[0]
                self.results.append(result)
        self.results.sort(key=attrgetter('average_source_relevance'))
    def merge_isbn_results(self):
        self.results = []
        for min_year, results in self.pools.itervalues():
            if results:
                self.results.append(self.merge(results, min_year))
        self.results.sort(key=attrgetter('average_source_relevance'))
    def length_merge(self, attr, results, null_value=None, shortest=True):
        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
        values = [x for x in values if len(x) > 0]
        if not values:
            return null_value
        values.sort(key=len, reverse=not shortest)
        return values[0]
    def random_merge(self, attr, results, null_value=None):
        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
        return values[0] if values else null_value
    def merge(self, results, min_year, do_asr=True):
        ans = Metadata(_('Unknown'))
        # We assume the shortest title has the least cruft in it
        ans.title = self.length_merge('title', results, null_value=ans.title)
        # No harm in having extra authors, maybe something useful like an
        # editor or translator
        ans.authors = self.length_merge('authors', results,
                null_value=ans.authors, shortest=False)
        # We assume the shortest publisher has the least cruft in it
        ans.publisher = self.length_merge('publisher', results,
                null_value=ans.publisher)
        # We assume the smallest set of tags has the least cruft in it
        ans.tags = self.length_merge('tags', results,
                null_value=ans.tags)
        # We assume the longest series has the most info in it
        ans.series = self.length_merge('series', results,
                null_value=ans.series, shortest=False)
        for r in results:
            if r.series and r.series == ans.series:
                ans.series_index = r.series_index
                break
        # Average the rating over all sources
        ratings = []
        for r in results:
            rating = r.rating
            if rating and rating > 0 and rating <= 5:
                ratings.append(rating)
        if ratings:
            ans.rating = sum(ratings)/len(ratings)
        # Smallest language is likely to be valid
        ans.language = self.length_merge('language', results,
                null_value=ans.language)
        # Choose longest comments
        ans.comments = self.length_merge('comments', results,
                null_value=ans.comments, shortest=False)
        # Published date
        if min_year:
            min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
            ans.pubdate = min_date
        else:
            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
            for r in results:
                if r.pubdate is not None and r.pubdate < min_date:
                    min_date = r.pubdate
            if min_date.year < 3000:
                ans.pubdate = min_date
        # Identifiers
        for r in results:
            ans.identifiers.update(r.identifiers)
        # Merge any other fields with no special handling (random merge)
        touched_fields = set()
        for r in results:
            if hasattr(r, 'identify_plugin'):
                touched_fields |= r.identify_plugin.touched_fields
        for f in touched_fields:
            if f.startswith('identifier:') or not ans.is_null(f):
                continue
            setattr(ans, f, self.random_merge(f, results,
                null_value=getattr(ans, f)))
        if do_asr:
            avg = [x.relevance_in_source for x in results]
            avg = sum(avg)/len(avg)
            ans.average_source_relevance = avg
        return ans
 def merge_identify_results(result_map, log):
    isbn_merge = ISBNMerge()
    for plugin, results in result_map.iteritems():
        for result in results:
            isbn_merge.add_result(result)
    return isbn_merge.finalize()
 # }}}
 def identify(log, abort, # {{{
        title=None, authors=None, identifiers={}, timeout=30):
    start_time = time.time()
    plugins = list(metadata_plugins(['identify']))
    kwargs = {
            'title': title,
@ -54,14 +262,17 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
    log('Running identify query with parameters:')
    log(kwargs)
    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
+    log('The log from individual plugins is below')
    workers = [Worker(p, kwargs, abort) for p in plugins]
    for w in workers:
        w.start()
    first_result_at = None
-    results = dict.fromkeys(plugins, [])
+    results = {}
    for p in plugins:
        results[p] = []
    logs = dict([(w.plugin, w.buf) for w in workers])
    def get_results():
        found = False
@ -75,6 +286,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
                found = True
        return found
    wait_time = msprefs['wait_after_first_identify_result']
    while True:
        time.sleep(0.2)
@ -84,24 +296,118 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
        if not is_worker_alive(workers):
            break
-        if (first_result_at is not None and time.time() - first_result_at <
+        if (first_result_at is not None and time.time() - first_result_at >
-                WAIT_AFTER_FIRST_RESULT):
+                wait_time):
            log('Not waiting any longer for more results')
            abort.set()
            break
-    get_results()
+    while not abort.is_set() and get_results():
        pass
    sort_kwargs = dict(kwargs)
    for k in list(sort_kwargs.iterkeys()):
        if k not in ('title', 'authors', 'identifiers'):
            sort_kwargs.pop(k)
-    for plugin, results in results.iteritems():
+    longest, lp = -1, ''
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+    for plugin, presults in results.iteritems():
-        plog = plugin.buf.getvalue().strip()
+        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
        plog = logs[plugin].getvalue().strip()
        log('\n'+'*'*30, plugin.name, '*'*30)
        log('Request extra headers:', plugin.browser.addheaders)
        log('Found %d results'%len(presults))
        time_spent = getattr(plugin, 'dl_time_spent', None)
        if time_spent is None:
            log('Downloading was aborted')
            longest, lp = -1, plugin.name
        else:
            log('Downloading from', plugin.name, 'took', time_spent)
            if time_spent > longest:
                longest, lp = time_spent, plugin.name
        for r in presults:
            log('\n\n---')
            log(unicode(r))
        if plog:
            log('\n'+'*'*35, plugin.name, '*'*35)
            log('Found %d results'%len(results))
            log(plog)
        log('\n'+'*'*80)
        for i, result in enumerate(presults):
            result.relevance_in_source = i
            result.has_cached_cover_url = \
                plugin.get_cached_cover_url(result.identifiers) is not None
            result.identify_plugin = plugin
    log('The identify phase took %.2f seconds'%(time.time() - start_time))
    log('The longest time (%f) was taken by:'%longest, lp)
    log('Merging results from different sources and finding earliest',
            'publication dates')
    start_time = time.time()
    results = merge_identify_results(results, log)
    log('We have %d merged results, merging took: %.2f seconds' %
            (len(results), time.time() - start_time))
    if msprefs['txt_comments']:
        for r in results:
            if r.plugin.has_html_comments and r.comments:
                r.comments = html2text(r.comments)
    dummy = Metadata(_('Unknown'))
    max_tags = msprefs['max_tags']
    for r in results:
        for f in msprefs['ignore_fields']:
            setattr(r, f, getattr(dummy, f))
        r.tags = r.tags[:max_tags]
    return results
 # }}}
 if __name__ == '__main__': # tests {{{
    # To run these test use: calibre-debug -e
    # src/calibre/ebooks/metadata/sources/identify.py
    from calibre.ebooks.metadata.sources.test import (test_identify,
            title_test, authors_test)
    tests = [
            ( # An e-book ISBN not on Amazon, one of the authors is
              # unknown to Amazon
                {'identifiers':{'isbn': '9780307459671'},
                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
            ),
            (  # Test absence of identifiers
                {'title':'Learning Python',
                    'authors':['Lutz']},
                [title_test('Learning Python',
                    exact=True), authors_test(['Mark Lutz'])
                 ]
            ),
            ( # Sophisticated comment formatting
                {'identifiers':{'isbn': '9781416580829'}},
                [title_test('Angels & Demons',
                    exact=True), authors_test(['Dan Brown'])]
            ),
            ( # No ISBN
                {'title':'Justine', 'authors':['Durrel']},
                [title_test('Justine', exact=True),
                    authors_test(['Lawrence Durrel'])]
            ),
            (  # A newer book
                {'identifiers':{'isbn': '9780316044981'}},
                [title_test('The Heroes', exact=True),
                    authors_test(['Joe Abercrombie'])]
            ),
        ]
    #test_identify(tests[1:2])
    test_identify(tests)
 # }}}
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -0,0 +1,40 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.metadata.sources.base import Source
 class ISBNDB(Source):
    name = 'ISBNDB'
    description = _('Downloads metadata from isbndb.com')
    capabilities = frozenset(['identify'])
    touched_fields = frozenset(['title', 'authors',
        'identifier:isbn', 'comments', 'publisher'])
    supports_gzip_transfer_encoding = True
    def __init__(self, *args, **kwargs):
        Source.__init__(self, *args, **kwargs)
        prefs = self.prefs
        prefs.defaults['key_migrated'] = False
        prefs.defaults['isbndb_key'] = None
        if not prefs['key_migrated']:
            prefs['key_migrated'] = True
            try:
                from calibre.customize.ui import config
                key = config['plugin_customization']['IsbnDB']
                prefs['isbndb_key'] = key
            except:
                pass
        self.isbndb_key = prefs['isbndb_key']
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@ -26,7 +26,7 @@ class OpenLibrary(Source):
        br = self.browser
        try:
            ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
-            result_queue.put(ans)
+            result_queue.put((self, ans))
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                log.error('No cover for ISBN: %r found'%isbn)
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
        get_cached_cover_urls)
 def isbn_test(isbn):
    isbn_ = check_isbn(isbn)
@ -45,8 +46,80 @@ def authors_test(authors):
    return test
 def init_test(tdir_name):
    tdir = tempfile.gettempdir()
    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    return tdir, lf, log, abort
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
    '''
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    from calibre.ebooks.metadata.sources.identify import identify
    tdir, lf, log, abort = init_test('Full Identify')
    prints('Log saved to', lf)
    times = []
    for kwargs, test_funcs in tests:
        log('#'*80)
        log('### Running test with:', kwargs)
        log('#'*80)
        prints('Running test with:', kwargs)
        args = (log, abort)
        start_time = time.time()
        results = identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if not results:
            prints('identify failed to find any results')
            break
        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')
        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URLs    :',
                    [x[0].name for x in get_cached_cover_urls(mi)])
            prints('*'*75, '\n\n')
        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)
        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)
        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)
        log('\n\n')
    prints('Average time per query', sum(times)/len(times))
    prints('Full log is at:', lf)
 # }}}
 def test_identify_plugin(name, tests): # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -61,11 +134,9 @@ def test_identify_plugin(name, tests):
            plugin = x
            break
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)
-    tdir = tempfile.gettempdir()
+    tdir, lf, log, abort = init_test(plugin.name)
    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    prints('Log saved to', lf)
    times = []
@ -159,4 +230,5 @@ def test_identify_plugin(name, tests):
    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
 # }}}
--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@ -71,14 +71,32 @@ class xISBN(object):
                ans.add(i)
        return ans
    def get_isbn_pool(self, isbn):
        data = self.get_data(isbn)
        raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
        isbns = []
        for x in raw:
            isbns += x
        isbns = frozenset(isbns)
        min_year = 100000
        for x in data:
            try:
                year = int(x['year'])
                if year < min_year:
                    min_year = year
            except:
                continue
        if min_year == 100000:
            min_year = None
        return isbns, min_year
 xisbn = xISBN()
 if __name__ == '__main__':
-    import sys
+    import sys, pprint
    isbn = sys.argv[-1]
-    print xisbn.get_data(isbn)
+    print pprint.pprint(xisbn.get_data(isbn))
    print
    print xisbn.get_associated_isbns(isbn)
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -102,6 +102,7 @@ class MobiMLizer(object):
    def __call__(self, oeb, context):
        oeb.logger.info('Converting XHTML to Mobipocket markup...')
        self.oeb = oeb
        self.log = self.oeb.logger
        self.opts = context
        self.profile = profile = context.dest
        self.fnums = fnums = dict((v, k) for k, v in profile.fnums.items())
@ -118,6 +119,10 @@ class MobiMLizer(object):
        del oeb.guide['cover']
        item = oeb.manifest.hrefs[href]
        if item.spine_position is not None:
            self.log.warn('Found an HTML cover,', item.href, 'removing it.',
                    'If you find some content missing from the output MOBI, it '
                    'is because you misidentified the HTML cover in the input '
                    'document')
            oeb.spine.remove(item)
            if item.media_type in OEB_DOCS:
                self.oeb.manifest.remove(item)
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -282,8 +282,8 @@ class Serializer(object):
                buffer.write('="')
                self.serialize_text(val, quot=True)
                buffer.write('"')
        if elem.text or len(elem) > 0:
        buffer.write('>')
        if elem.text or len(elem) > 0:
            if elem.text:
                self.anchor_offset = None
                self.serialize_text(elem.text)
@ -293,8 +293,6 @@ class Serializer(object):
                    self.anchor_offset = None
                    self.serialize_text(child.tail)
        buffer.write('</%s>' % tag)
        else:
            buffer.write('/>')
    def serialize_text(self, text, quot=False):
        text = text.replace('&', '&amp;')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
 from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
@ -140,8 +142,17 @@ class Stylizer(object):
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
-            if elem.tag == XHTML('style') and elem.text \
+            if (elem.tag == XHTML('style') and
-               and elem.get('type', CSS_MIME) in OEB_STYLES:
+                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + elem.text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname)
--- a/src/calibre/ebooks/oeb/transforms/margins.py
+++ b/src/calibre/ebooks/oeb/transforms/margins.py
@ -1,56 +0,0 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 class RemoveFakeMargins(object):
    '''
    Try to detect and remove fake margins inserted by asinine ebook creation
    software on each paragraph/wrapper div. Can be used only after CSS
    flattening.
    '''
    def __call__(self, oeb, opts, log):
        self.oeb, self.opts, self.log = oeb, opts, log
        from calibre.ebooks.oeb.base import XPath, OEB_STYLES
        stylesheet = None
        for item in self.oeb.manifest:
            if item.media_type.lower() in OEB_STYLES:
                stylesheet = item.data
                break
        if stylesheet is None:
            return
        top_level_elements = {}
        second_level_elements = {}
        for x in self.oeb.spine:
            root = x.data
            body = XPath('//h:body')(root)
            if body:
                body = body[0]
            if not hasattr(body, 'xpath'):
                continue
            # Check for margins on top level elements
            for lb in XPath('./h:div|./h:p|./*/h:div|./*/h:p')(body):
                cls = lb.get('class', '')
                level = top_level_elements if lb.getparent() is body else \
                        second_level_elements
                if cls not in level:
                    level[cls] = []
                    top_level_elements[cls] = []
                level[cls].append(lb)
    def get_margins(self, stylesheet, cls):
        pass
--- a/src/calibre/ebooks/oeb/transforms/page_margin.py
+++ b/src/calibre/ebooks/oeb/transforms/page_margin.py
@ -11,11 +11,32 @@ from collections import Counter
 from calibre.ebooks.oeb.base import OEB_STYLES, barename, XPath
 class RemoveAdobeMargins(object):
    '''
    Remove margins specified in Adobe's page templates.
    '''
    def __call__(self, oeb, log, opts):
        self.oeb, self.opts, self.log = oeb, opts, log
        for item in self.oeb.manifest:
            if item.media_type in ('application/vnd.adobe-page-template+xml',
                    'application/vnd.adobe.page-template+xml'):
                self.log('Removing page margins specified in the'
                        ' Adobe page template')
                for elem in item.data.xpath(
                        '//*[@margin-bottom or @margin-top '
                        'or @margin-left or @margin-right]'):
                    for margin in ('left', 'right', 'top', 'bottom'):
                        attr = 'margin-'+margin
                        elem.attrib.pop(attr, None)
 class RemoveFakeMargins(object):
    '''
    Remove left and right margins from paragraph/divs if the same margin is specified
-    on almost all the elements of at that level.
+    on almost all the elements at that level.
    Must be called only after CSS flattening
    '''
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
        size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
        font_family(NULL), color(rgb)  {
    if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
    this->font_family = family_name(this->font_name);
    if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@ -134,7 +135,12 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 }
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
-    XMLFont *f = new XMLFont(font_name, size, rgb);
+    XMLFont *f = NULL;
    if (font_name == NULL) 
        font_name = new string("Unknown");
        // font_name must not be deleted
    f = new XMLFont(font_name, size, rgb);
    return this->add_font(f);
 }
--- a/src/calibre/ebooks/pdf/input.py
+++ b/src/calibre/ebooks/pdf/input.py
@ -34,7 +34,7 @@ class PDFInput(InputFormatPlugin):
        from calibre.ebooks.pdf.reflow import PDFDocument
        if pdfreflow_err:
            raise RuntimeError('Failed to load pdfreflow: ' + pdfreflow_err)
-        pdfreflow.reflow(stream.read())
+        pdfreflow.reflow(stream.read(), 1, -1)
        xml = open('index.xml', 'rb').read()
        PDFDocument(xml, self.opts, self.log)
        return os.path.join(os.getcwd(), 'metadata.opf')
--- a/src/calibre/ebooks/pdf/main.cpp
+++ b/src/calibre/ebooks/pdf/main.cpp
@ -24,13 +24,14 @@ extern "C" {
    pdfreflow_reflow(PyObject *self, PyObject *args) {
        char *pdfdata;
        Py_ssize_t size;
        int first_page, last_page, num = 0;
-        if (!PyArg_ParseTuple(args, "s#", &pdfdata, &size))
+        if (!PyArg_ParseTuple(args, "s#ii", &pdfdata, &size, &first_page, &last_page))
            return NULL;
        try {
            Reflow reflow(pdfdata, static_cast<std::ifstream::pos_type>(size));
-            reflow.render();
+            num = reflow.render(first_page, last_page);
        } catch (std::exception &e) {
            PyErr_SetString(PyExc_RuntimeError, e.what()); return NULL;
        } catch (...) {
@ -38,7 +39,7 @@ extern "C" {
                    "Unknown exception raised while rendering PDF"); return NULL;
        }
-        Py_RETURN_NONE;
+        return Py_BuildValue("i", num);
    }
    static PyObject *
@ -166,8 +167,8 @@ extern "C" {
    static 
    PyMethodDef pdfreflow_methods[] = {
        {"reflow", pdfreflow_reflow, METH_VARARGS,
-        "reflow(pdf_data)\n\n"
+        "reflow(pdf_data, first_page, last_page)\n\n"
-                "Reflow the specified PDF."
+                "Reflow the specified PDF. Returns the number of pages in the PDF. If last_page is -1 renders to end of document."
        },
        {"get_metadata", pdfreflow_get_metadata, METH_VARARGS,
        "get_metadata(pdf_data, cover)\n\n"
--- a/src/calibre/ebooks/pdf/reflow.cpp
+++ b/src/calibre/ebooks/pdf/reflow.cpp
@ -712,16 +712,18 @@ Reflow::Reflow(char *pdfdata, size_t sz) :
 }
-void
+int
-Reflow::render() {
+Reflow::render(int first_page, int last_page) {
    if (!this->doc->okToCopy()) 
        cout << "Warning, this document has the copy protection flag set, ignoring." << endl;
    globalParams->setTextEncoding(encoding);
-    int first_page = 1;
+    int doc_pages = doc->getNumPages();
-    int last_page = doc->getNumPages();
+    if (last_page < 1 || last_page > doc_pages) last_page = doc_pages;
    if (first_page < 1) first_page = 1;
    if (first_page > last_page) first_page = last_page;
    XMLOutputDev *xml_out = new XMLOutputDev(this->doc);
    doc->displayPages(xml_out, first_page, last_page,
@ -733,9 +735,12 @@ Reflow::render() {
              false //Printing
    );
    if (last_page - first_page == doc_pages - 1)
        this->dump_outline();
    delete xml_out;
    return doc_pages;
 }
 void Reflow::dump_outline() {
--- a/src/calibre/ebooks/pdf/reflow.h
+++ b/src/calibre/ebooks/pdf/reflow.h
@ -66,7 +66,7 @@ class Reflow {
        ~Reflow();
        /* Convert the PDF to XML. All files are output to the current directory */
-        void render();
+        int render(int first_page, int last_page);
        /* Get the PDF Info Dictionary */
        map<string, string> get_info();
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@ -37,7 +37,7 @@ class MarkdownMLizer(object):
            if not self.opts.keep_links:
                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
            if not self.opts.keep_image_references:
-                html = re.sub(r'<\s*img[^>]*>', '', html)\
+                html = re.sub(r'<\s*img[^>]*>', '', html)
            text = html2text(html)
--- a/src/calibre/gui2/actions/convert.py
+++ b/src/calibre/gui2/actions/convert.py
@ -51,7 +51,7 @@ class ConvertAction(InterfaceAction):
        self.queue_convert_jobs(jobs, changed, bad, rows, previous,
                self.book_auto_converted, extra_job_args=[on_card])
-    def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format):
+    def auto_convert_mail(self, to, fmts, delete_from_library, book_ids, format, subject):
        previous = self.gui.library_view.currentIndex()
        rows = [x.row() for x in \
                self.gui.library_view.selectionModel().selectedRows()]
@ -59,7 +59,7 @@ class ConvertAction(InterfaceAction):
        if jobs == []: return
        self.queue_convert_jobs(jobs, changed, bad, rows, previous,
                self.book_auto_converted_mail,
-                extra_job_args=[delete_from_library, to, fmts])
+                extra_job_args=[delete_from_library, to, fmts, subject])
    def auto_convert_news(self, book_ids, format):
        previous = self.gui.library_view.currentIndex()
@ -145,9 +145,10 @@ class ConvertAction(InterfaceAction):
        self.gui.sync_to_device(on_card, False, specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
    def book_auto_converted_mail(self, job):
-        temp_files, fmt, book_id, delete_from_library, to, fmts = self.conversion_jobs[job]
+        temp_files, fmt, book_id, delete_from_library, to, fmts, subject = self.conversion_jobs[job]
        self.book_converted(job)
-        self.gui.send_by_mail(to, fmts, delete_from_library, specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
+        self.gui.send_by_mail(to, fmts, delete_from_library, subject=subject,
                specific_format=fmt, send_ids=[book_id], do_auto_convert=False)
    def book_auto_converted_news(self, job):
        temp_files, fmt, book_id = self.conversion_jobs[job]
--- a/src/calibre/gui2/actions/device.py
+++ b/src/calibre/gui2/actions/device.py
@ -82,7 +82,8 @@ class ShareConnMenu(QMenu): # {{{
            keys = sorted(opts.accounts.keys())
            for account in keys:
                formats, auto, default = opts.accounts[account]
-                dest = 'mail:'+account+';'+formats
+                subject = opts.subjects.get(account, '')
                dest = 'mail:'+account+';'+formats+';'+subject
                action1 = DeviceAction(dest, False, False, I('mail.png'),
                        account)
                action2 = DeviceAction(dest, True, False, I('mail.png'),
--- a/src/calibre/gui2/actions/fetch_news.py
+++ b/src/calibre/gui2/actions/fetch_news.py
@ -5,6 +5,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import gc
 from PyQt4.Qt import Qt
 from calibre.gui2 import Dispatcher
@ -53,11 +55,11 @@ class FetchNewsAction(InterfaceAction):
    def scheduled_recipe_fetched(self, job):
        temp_files, fmt, arg = self.conversion_jobs.pop(job)
-        pt = temp_files[0]
+        fname = temp_files[0].name
        if job.failed:
            self.scheduler.recipe_download_failed(arg)
            return self.gui.job_exception(job)
-        id = self.gui.library_view.model().add_news(pt.name, arg)
+        id = self.gui.library_view.model().add_news(fname, arg)
        # Arg may contain a "keep_issues" variable. If it is non-zero,
        # delete all but newest x issues.
@ -81,5 +83,6 @@ class FetchNewsAction(InterfaceAction):
        self.gui.status_bar.show_message(arg['title'] + _(' fetched.'), 3000)
        self.gui.email_news(id)
        self.gui.sync_news()
        gc.collect()
--- a/src/calibre/gui2/book_details.py
+++ b/src/calibre/gui2/book_details.py
@ -270,6 +270,8 @@ class BookInfo(QWebView):
            <style type="text/css">
                body, td {background-color: transparent; font-size: %dpx; color: %s }
                a { text-decoration: none; color: blue }
                div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
                table { margin-bottom: 0; padding-bottom: 0; }
            </style>
            </head>
            <body>
@ -278,9 +280,10 @@ class BookInfo(QWebView):
        <html>
        '''%(f, c)
        if self.vertical:
            extra = ''
            if comments:
-                rows += u'<tr><td colspan="2">%s</td></tr>'%comments
+                extra = u'<div class="description">%s</div>'%comments
-            self.setHtml(templ%(u'<table>%s</table>'%rows))
+            self.setHtml(templ%(u'<table>%s</table>%s'%(rows, extra)))
        else:
            left_pane = u'<table>%s</table>'%rows
            right_pane = u'<div>%s</div>'%comments
--- a/src/calibre/gui2/convert/htmlz_output.py
+++ b/src/calibre/gui2/convert/htmlz_output.py
@ -0,0 +1,26 @@
 # -*- coding: utf-8 -*-
 __license__ = 'GPL 3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2.convert.htmlz_output_ui import Ui_Form
 from calibre.gui2.convert import Widget
 format_model = None
 class PluginWidget(Widget, Ui_Form):
    TITLE = _('HTMLZ Output')
    HELP = _('Options specific to')+' HTMLZ '+_('output')
    COMMIT_NAME = 'htmlz_output'
    ICON = I('mimetypes/html.png')
    def __init__(self, parent, get_option, get_help, db=None, book_id=None):
        Widget.__init__(self, parent, ['htmlz_css_type', 'htmlz_class_style'])
        self.db, self.book_id = db, book_id
        for x in get_option('htmlz_css_type').option.choices:
            self.opt_htmlz_css_type.addItem(x)
        for x in get_option('htmlz_class_style').option.choices:
            self.opt_htmlz_class_style.addItem(x)
        self.initialize_options(get_option, get_help, db, book_id)
--- a/src/calibre/gui2/convert/htmlz_output.ui
+++ b/src/calibre/gui2/convert/htmlz_output.ui
@ -0,0 +1,61 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Form</class>
 <widget class="QWidget" name="Form">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>438</width>
    <height>300</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Form</string>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="2" column="0">
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>246</height>
      </size>
     </property>
    </spacer>
   </item>
   <item row="0" column="0">
    <widget class="QLabel" name="label">
     <property name="text">
      <string>How to handle CSS</string>
     </property>
     <property name="buddy">
      <cstring>opt_htmlz_css_type</cstring>
     </property>
    </widget>
   </item>
   <item row="0" column="1">
    <widget class="QComboBox" name="opt_htmlz_css_type">
     <property name="minimumContentsLength">
      <number>20</number>
     </property>
    </widget>
   </item>
   <item row="1" column="0">
    <widget class="QLabel" name="label_2">
     <property name="text">
      <string>How to handle class based CSS</string>
     </property>
    </widget>
   </item>
   <item row="1" column="1">
    <widget class="QComboBox" name="opt_htmlz_class_style"/>
   </item>
  </layout>
 </widget>
 <resources/>
 <connections/>
 </ui>
--- a/src/calibre/gui2/device.py
+++ b/src/calibre/gui2/device.py
@ -887,9 +887,14 @@ class DeviceMixin(object): # {{{
                on_card = dest
            self.sync_to_device(on_card, delete, fmt)
        elif dest == 'mail':
-            to, fmts = sub_dest.split(';')
+            sub_dest_parts = sub_dest.split(';')
            while len(sub_dest_parts) < 3:
                sub_dest_parts.append('')
            to = sub_dest_parts[0]
            fmts = sub_dest_parts[1]
            subject = ';'.join(sub_dest_parts[2:]) 
            fmts = [x.strip().lower() for x in fmts.split(',')]
-            self.send_by_mail(to, fmts, delete)
+            self.send_by_mail(to, fmts, delete, subject=subject)
    def cover_to_thumbnail(self, data):
        if self.device_manager.device and \
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@ -7,15 +7,25 @@
    <x>0</x>
    <y>0</y>
    <width>917</width>
-    <height>480</height>
+    <height>492</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Dialog</string>
  </property>
  <property name="windowIcon">
   <iconset resource="../../../../resources/images.qrc">
    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0" colspan="2">
    <widget class="QLabel" name="title">
     <property name="font">
      <font>
       <weight>75</weight>
       <bold>true</bold>
      </font>
     </property>
     <property name="text">
      <string>TextLabel</string>
     </property>
@ -24,10 +34,26 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0">
+   <item row="1" column="0" rowspan="3">
    <widget class="CoverView" name="cover"/>
   </item>
   <item row="1" column="1">
    <widget class="QScrollArea" name="scrollArea">
     <property name="frameShape">
      <enum>QFrame::NoFrame</enum>
     </property>
     <property name="widgetResizable">
      <bool>true</bool>
     </property>
     <widget class="QWidget" name="scrollAreaWidgetContents">
      <property name="geometry">
       <rect>
        <x>0</x>
        <y>0</y>
        <width>435</width>
        <height>670</height>
       </rect>
      </property>
      <layout class="QVBoxLayout" name="verticalLayout">
       <item>
        <widget class="QLabel" name="text">
@ -72,14 +98,18 @@
         </layout>
        </widget>
       </item>
-     <item>
+      </layout>
     </widget>
    </widget>
   </item>
   <item row="2" column="1">
    <widget class="QCheckBox" name="fit_cover">
     <property name="text">
      <string>Fit &amp;cover within view</string>
     </property>
    </widget>
   </item>
-     <item>
+   <item row="3" column="1">
    <layout class="QHBoxLayout" name="horizontalLayout">
     <item>
      <widget class="QPushButton" name="previous_button">
@ -106,8 +136,6 @@
    </layout>
   </item>
  </layout>
   </item>
  </layout>
 </widget>
 <customwidgets>
  <customwidget>
--- a/src/calibre/gui2/dialogs/confirm_delete.py
+++ b/src/calibre/gui2/dialogs/confirm_delete.py
@ -3,12 +3,11 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 from calibre.gui2 import dynamic
 from calibre.gui2.dialogs.confirm_delete_ui import Ui_Dialog
 from PyQt4.Qt import QDialog, Qt, QPixmap, QIcon
-def _config_name(name):
+from calibre import confirm_config_name
-    return name + '_again'
+from calibre.gui2 import dynamic
 from calibre.gui2.dialogs.confirm_delete_ui import Ui_Dialog
 class Dialog(QDialog, Ui_Dialog):
@ -22,11 +21,11 @@ class Dialog(QDialog, Ui_Dialog):
        self.buttonBox.setFocus(Qt.OtherFocusReason)
    def toggle(self, *args):
-        dynamic[_config_name(self.name)] = self.again.isChecked()
+        dynamic[confirm_config_name(self.name)] = self.again.isChecked()
 def confirm(msg, name, parent=None, pixmap='dialog_warning.png'):
-    if not dynamic.get(_config_name(name), True):
+    if not dynamic.get(confirm_config_name(name), True):
        return True
    d = Dialog(msg, name, parent)
    d.label.setPixmap(QPixmap(I(pixmap)))
--- a/src/calibre/gui2/email.py
+++ b/src/calibre/gui2/email.py
@ -22,6 +22,7 @@ from calibre.customize.ui import available_input_formats, available_output_forma
 from calibre.ebooks.metadata import authors_to_string
 from calibre.constants import preferred_encoding
 from calibre.gui2 import config, Dispatcher, warning_dialog
 from calibre.library.save_to_disk import get_components
 from calibre.utils.config import tweaks
 class EmailJob(BaseJob): # {{{
@ -210,7 +211,7 @@ class EmailMixin(object): # {{{
    def __init__(self):
        self.emailer = Emailer(self.job_manager)
-    def send_by_mail(self, to, fmts, delete_from_library, send_ids=None,
+    def send_by_mail(self, to, fmts, delete_from_library, subject='', send_ids=None,
            do_auto_convert=True, specific_format=None):
        ids = [self.library_view.model().id(r) for r in self.library_view.selectionModel().selectedRows()] if send_ids is None else send_ids
        if not ids or len(ids) == 0:
@ -239,7 +240,14 @@ class EmailMixin(object): # {{{
                remove_ids.append(id)
                jobnames.append(t)
                attachments.append(f)
                if not subject:
                    subjects.append(_('E-book:')+ ' '+t)
                else:
                    components = get_components(subject, mi, id)
                    if not components:
                        components = [mi.title]
                    subject = os.path.join(*components)
                    subjects.append(subject)
                a = authors_to_string(mi.authors if mi.authors else \
                        [_('Unknown')])
                texts.append(_('Attached, you will find the e-book') + \
@ -292,7 +300,7 @@ class EmailMixin(object): # {{{
                if self.auto_convert_question(
                    _('Auto convert the following books before sending via '
                        'email?'), autos):
-                    self.iactions['Convert Books'].auto_convert_mail(to, fmts, delete_from_library, auto, format)
+                    self.iactions['Convert Books'].auto_convert_mail(to, fmts, delete_from_library, auto, format, subject)
        if bad:
            bad = '\n'.join('%s'%(i,) for i in bad)
--- a/src/calibre/gui2/filename_pattern.ui
+++ b/src/calibre/gui2/filename_pattern.ui
@ -206,6 +206,46 @@
              </property>
             </widget>
            </item>
            <item row="5" column="0">
             <widget class="QLabel" name="label_8">
              <property name="text">
               <string>Publisher:</string>
              </property>
             </widget>
            </item>
            <item row="5" column="1">
             <widget class="QLineEdit" name="publisher">
              <property name="toolTip">
               <string>Regular expression (?P&lt;publisher&gt;)</string>
              </property>
              <property name="text">
               <string>No match</string>
              </property>
              <property name="readOnly">
               <bool>true</bool>
              </property>
             </widget>
            </item>
            <item row="6" column="0">
             <widget class="QLabel" name="label_9">
              <property name="text">
               <string>Published:</string>
              </property>
             </widget>
            </item>
            <item row="6" column="1">
             <widget class="QLineEdit" name="pubdate">
              <property name="toolTip">
               <string>Regular expression (?P&lt;published&gt;)</string>
              </property>
              <property name="text">
               <string>No match</string>
              </property>
              <property name="readOnly">
               <bool>true</bool>
              </property>
             </widget>
            </item>
           </layout>
          </widget>
         </widget>
--- a/src/calibre/gui2/library/models.py
+++ b/src/calibre/gui2/library/models.py
@ -7,7 +7,6 @@ __docformat__ = 'restructuredtext en'
 import shutil, functools, re, os, traceback
 from contextlib import closing
 from operator import attrgetter
 from PyQt4.Qt import QAbstractTableModel, Qt, pyqtSignal, QIcon, QImage, \
        QModelIndex, QVariant, QDate, QColor
@ -18,7 +17,7 @@ from calibre.ebooks.metadata import fmt_sidx, authors_to_string, string_to_autho
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.config import tweaks, prefs
 from calibre.utils.date import dt_factory, qt_to_dt, isoformat
-from calibre.utils.icu import sort_key, strcmp as icu_strcmp
+from calibre.utils.icu import sort_key
 from calibre.ebooks.metadata.meta import set_metadata as _set_metadata
 from calibre.utils.search_query_parser import SearchQueryParser
 from calibre.library.caches import _match, CONTAINS_MATCH, EQUALS_MATCH, \
@ -984,6 +983,21 @@ class OnDeviceSearch(SearchQueryParser): # {{{
 # }}}
 class DeviceDBSortKeyGen(object): # {{{
    def __init__(self, attr, keyfunc, db):
        self.attr = attr
        self.db = db
        self.keyfunc = keyfunc
    def __call__(self, x):
        try:
            ans = self.keyfunc(getattr(self.db[x], self.attr))
        except:
            ans = None
        return ans
 # }}}
 class DeviceBooksModel(BooksModel): # {{{
    booklist_dirtied = pyqtSignal()
@ -1089,59 +1103,40 @@ class DeviceBooksModel(BooksModel): # {{{
    def sort(self, col, order, reset=True):
        descending = order != Qt.AscendingOrder
        def strcmp(attr):
            ag = attrgetter(attr)
            def _strcmp(x, y):
                x = ag(self.db[x])
                y = ag(self.db[y])
                if x == None:
                    x = ''
                if y == None:
                    y = ''
                return icu_strcmp(x.strip(), y.strip())
            return _strcmp
        def datecmp(x, y):
            x = self.db[x].datetime
            y = self.db[y].datetime
            return cmp(dt_factory(x, assume_utc=True), dt_factory(y,
                assume_utc=True))
        def sizecmp(x, y):
            x, y = int(self.db[x].size), int(self.db[y].size)
            return cmp(x, y)
        def tagscmp(x, y):
            x = ','.join(sorted(getattr(self.db[x], 'device_collections', []),key=sort_key))
            y = ','.join(sorted(getattr(self.db[y], 'device_collections', []),key=sort_key))
            return cmp(x, y)
        def libcmp(x, y):
            x, y = self.db[x].in_library, self.db[y].in_library
            return cmp(x, y)
        def authorcmp(x, y):
            ax = getattr(self.db[x], 'author_sort', None)
            ay = getattr(self.db[y], 'author_sort', None)
            if ax and ay:
                x = ax
                y = ay
            else:
                x, y = authors_to_string(self.db[x].authors), \
                                authors_to_string(self.db[y].authors)
            return cmp(x, y)
        cname = self.column_map[col]
-        fcmp = {
+        def author_key(x):
-                'title': strcmp('title_sorter'),
+            try:
-                'authors' : authorcmp,
+                ax = self.db[x].author_sort
-                'size' : sizecmp,
+                if not ax:
-                'timestamp': datecmp,
+                    raise Exception('')
-                'collections': tagscmp,
+            except:
-                'inlibrary': libcmp,
+                try:
                    ax = authors_to_string(self.db[x].authors)
                except:
                    ax = ''
            return ax
        keygen = {
                'title': ('title_sorter', lambda x: sort_key(x) if x else ''),
                'authors' : author_key,
                'size' : ('size', int),
                'timestamp': ('datetime', functools.partial(dt_factory, assume_utc=True)),
                'collections': ('device_collections', lambda x:sorted(x,
                    key=sort_key)),
                'inlibrary': ('in_library', lambda x: x),
                }[cname]
-        self.map.sort(cmp=fcmp, reverse=descending)
+        keygen = keygen if callable(keygen) else DeviceDBSortKeyGen(
            keygen[0], keygen[1], self.db)
        self.map.sort(key=keygen, reverse=descending)
        if len(self.map) == len(self.db):
            self.sorted_map = list(self.map)
        else:
            self.sorted_map = list(range(len(self.db)))
-            self.sorted_map.sort(cmp=fcmp, reverse=descending)
+            self.sorted_map.sort(key=keygen, reverse=descending)
        self.sorted_on = (self.column_map[col], order)
        self.sort_history.insert(0, self.sorted_on)
        if hasattr(keygen, 'db'):
            keygen.db = None
        if reset:
            self.reset()
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en'
 import textwrap, re, os
-from PyQt4.Qt import Qt, QDateEdit, QDate, \
+from PyQt4.Qt import (Qt, QDateEdit, QDate,
-    QIcon, QToolButton, QWidget, QLabel, QGridLayout, \
+    QIcon, QToolButton, QWidget, QLabel, QGridLayout,
-    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
+    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
-    QPushButton, QSpinBox, QLineEdit
+    QPushButton, QSpinBox, QLineEdit, QSizePolicy)
 from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \
        choose_files, error_dialog, choose_images, question_dialog
 from calibre.utils.date import local_tz, qt_to_dt
-from calibre import strftime
+from calibre import strftime, fit_image
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.customize.ui import run_plugins_on_import
 from calibre.utils.date import utcfromtimestamp
@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{
    def initialize(self, db, id_):
        self.changed = False
        self.formats.clear()
        exts = db.formats(id_, index_is_id=True)
        self.original_val = set([])
        if exts:
@ -638,6 +641,23 @@ class Cover(ImageView): # {{{
                self.trim_cover_button, self.download_cover_button,
                self.generate_cover_button]
        self.frame_size = (300, 400)
        self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred,
            QSizePolicy.Preferred))
    def frame_resized(self, ev):
        sz = ev.size()
        self.frame_size = (sz.width()//3, sz.height())
    def sizeHint(self):
        sz = ImageView.sizeHint(self)
        w, h = sz.width(), sz.height()
        resized, nw, nh = fit_image(w, h, self.frame_size[0],
                self.frame_size[1])
        if resized:
            sz = QSize(nw, nh)
        return sz
    def select_cover(self, *args):
        files = choose_images(self, 'change cover dialog',
                             _('Choose cover for ') +
@ -882,8 +902,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
 # }}}
-class ISBNEdit(QLineEdit): # {{{
+class IdentifiersEdit(QLineEdit): # {{{
-    LABEL = _('IS&BN:')
+    LABEL = _('I&ds:')
    BASE_TT = _('Edit the identifiers for this book. '
            'For example: \n\n%s')%(
            'isbn:1565927249, doi:10.1000/182, amazon:1565927249')
    def __init__(self, parent):
        QLineEdit.__init__(self, parent)
@ -893,32 +916,44 @@ class ISBNEdit(QLineEdit): # {{{
    @dynamic_property
    def current_val(self):
        def fget(self):
-            return self.pat.sub('', unicode(self.text()).strip())
+            raw = unicode(self.text()).strip()
            parts = [x.strip() for x in raw.split(',')]
            ans = {}
            for x in parts:
                c = x.split(':')
                if len(c) == 2:
                    ans[c[0]] = c[1]
            return ans
        def fset(self, val):
            if not val:
-                val = ''
+                val = {}
-            self.setText(val.strip())
+            txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()])
            self.setText(txt.strip())
        return property(fget=fget, fset=fset)
    def initialize(self, db, id_):
-        self.current_val = db.isbn(id_, index_is_id=True)
+        self.current_val = db.get_identifiers(id_, index_is_id=True)
        self.original_val = self.current_val
    def commit(self, db, id_):
-        db.set_isbn(id_, self.current_val, notify=False, commit=False)
+        if self.original_val != self.current_val:
            db.set_identifiers(id_, self.current_val, notify=False, commit=False)
        return True
    def validate(self, *args):
-        isbn = self.current_val
+        identifiers = self.current_val
-        tt = _('This ISBN number is valid')
+        isbn = identifiers.get('isbn', '')
        tt = self.BASE_TT
        extra = ''
        if not isbn:
            col = 'rgba(0,255,0,0%)'
        elif check_isbn(isbn) is not None:
            col = 'rgba(0,255,0,20%)'
            extra = '\n\n'+_('This ISBN number is valid')
        else:
            col = 'rgba(255,0,0,20%)'
-            tt = _('This ISBN number is invalid')
+            extra = '\n\n' + _('This ISBN number is invalid')
-        self.setToolTip(tt)
+        self.setToolTip(tt+extra)
        self.setStyleSheet('QLineEdit { background-color: %s }'%col)
 # }}}
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -8,17 +10,17 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial
-from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
+from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
-        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
+        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
-        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
+        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem,
-        QSizePolicy, QPalette, QFrame, QSize, QKeySequence
+        QSizePolicy, QPalette, QFrame, QSize, QKeySequence)
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
-from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \
+from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit,
-    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \
+    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit,
-    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \
+    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit,
-    BuddyLabel, DateEdit, PubdateEdit
+    BuddyLabel, DateEdit, PubdateEdit)
 from calibre.gui2.custom_column_widgets import populate_metadata_page
 from calibre.utils.config import tweaks
@ -145,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.tags_editor_button.clicked.connect(self.tags_editor)
        self.basic_metadata_widgets.append(self.tags)
-        self.isbn = ISBNEdit(self)
+        self.identifiers = IdentifiersEdit(self)
-        self.basic_metadata_widgets.append(self.isbn)
+        self.basic_metadata_widgets.append(self.identifiers)
        self.publisher = PublisherEdit(self)
        self.basic_metadata_widgets.append(self.publisher)
@ -280,8 +282,8 @@ class MetadataSingleDialogBase(ResizableDialog):
            self.publisher.current_val = mi.publisher
        if not mi.is_null('tags'):
            self.tags.current_val = mi.tags
-        if not mi.is_null('isbn'):
+        if not mi.is_null('identifiers'):
-            self.isbn.current_val = mi.isbn
+            self.identifiers.current_val = mi.identifiers
        if not mi.is_null('pubdate'):
            self.pubdate.current_val = mi.pubdate
        if not mi.is_null('series') and mi.series.strip():
@ -385,6 +387,14 @@ class MetadataSingleDialogBase(ResizableDialog):
                disconnect(x.clicked)
    # }}}
 class Splitter(QSplitter):
    frame_resized = pyqtSignal(object)
    def resizeEvent(self, ev):
        self.frame_resized.emit(ev)
        return QSplitter.resizeEvent(self, ev)
 class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
    def do_layout(self):
@ -437,8 +447,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
        tl.addWidget(self.formats_manager, 0, 6, 3, 1)
-        self.splitter = QSplitter(Qt.Horizontal, self)
+        self.splitter = Splitter(Qt.Horizontal, self)
        self.splitter.addWidget(self.cover)
        self.splitter.frame_resized.connect(self.cover.frame_resized)
        l.addWidget(self.splitter)
        self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
        gb.l = l = QGridLayout()
@ -475,9 +486,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
        create_row2(1, self.rating)
        sto(self.rating, self.tags)
        create_row2(2, self.tags, self.tags_editor_button)
-        sto(self.tags_editor_button, self.isbn)
+        sto(self.tags_editor_button, self.identifiers)
-        create_row2(3, self.isbn)
+        create_row2(3, self.identifiers)
-        sto(self.isbn, self.timestamp)
+        sto(self.identifiers, self.timestamp)
        create_row2(4, self.timestamp, self.timestamp.clear_button)
        sto(self.timestamp.clear_button, self.pubdate)
        create_row2(5, self.pubdate, self.pubdate.clear_button)
@ -562,9 +573,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
        create_row(8, self.pubdate, self.publisher,
                   button=self.pubdate.clear_button, icon='trash.png')
        create_row(9, self.publisher, self.timestamp)
-        create_row(10, self.timestamp, self.isbn,
+        create_row(10, self.timestamp, self.identifiers,
                   button=self.timestamp.clear_button, icon='trash.png')
-        create_row(11, self.isbn, self.comments)
+        create_row(11, self.identifiers, self.comments)
        tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
                   12, 1, 1 ,1)
@ -580,7 +591,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
            sr.setWidget(w)
            gbl.addWidget(sr)
            self.tabs[0].l.addWidget(gb, 0, 1, 1, 1)
-            sto(self.isbn, gb)
+            sto(self.identifiers, gb)
        w = QGroupBox(_('&Comments'), tab0)
        sp = QSizePolicy()
--- a/src/calibre/gui2/preferences/emailp.py
+++ b/src/calibre/gui2/preferences/emailp.py
@ -5,6 +5,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import textwrap
 from PyQt4.Qt import QAbstractTableModel, QVariant, QFont, Qt
@ -17,25 +19,30 @@ from calibre.utils.smtp import config as smtp_prefs
 class EmailAccounts(QAbstractTableModel): # {{{
-    def __init__(self, accounts):
+    def __init__(self, accounts, subjects):
        QAbstractTableModel.__init__(self)
        self.accounts = accounts
        self.subjects = subjects
        self.account_order = sorted(self.accounts.keys())
-        self.headers  = map(QVariant, [_('Email'), _('Formats'), _('Auto send')])
+        self.headers  = map(QVariant, [_('Email'), _('Formats'), _('Subject'), _('Auto send')])
        self.default_font = QFont()
        self.default_font.setBold(True)
        self.default_font = QVariant(self.default_font)
-        self.tooltips =[NONE] + map(QVariant,
+        self.tooltips =[NONE] + list(map(QVariant, map(textwrap.fill,
            [_('Formats to email. The first matching format will be sent.'),
             _('Subject of the email to use when sending. When left blank '
               'the title will be used for the subject. Also, the same '
               'templates used for "Save to disk" such as {title} and '
               '{author_sort} can be used here.'),
             '<p>'+_('If checked, downloaded news will be automatically '
                     'mailed <br>to this email address '
-                     '(provided it is in one of the listed formats).')])
+                     '(provided it is in one of the listed formats).')])))
    def rowCount(self, *args):
        return len(self.account_order)
    def columnCount(self, *args):
-        return 3
+        return len(self.headers)
    def headerData(self, section, orientation, role):
        if role == Qt.DisplayRole and orientation == Qt.Horizontal:
@ -56,14 +63,16 @@ class EmailAccounts(QAbstractTableModel): # {{{
                return QVariant(account)
            if col ==  1:
                return QVariant(self.accounts[account][0])
            if col == 2:
                return QVariant(self.subjects.get(account, ''))
        if role == Qt.FontRole and self.accounts[account][2]:
            return self.default_font
-        if role == Qt.CheckStateRole and col == 2:
+        if role == Qt.CheckStateRole and col == 3:
            return QVariant(Qt.Checked if self.accounts[account][1] else Qt.Unchecked)
        return NONE
    def flags(self, index):
-        if index.column() == 2:
+        if index.column() == 3:
            return QAbstractTableModel.flags(self, index)|Qt.ItemIsUserCheckable
        else:
            return QAbstractTableModel.flags(self, index)|Qt.ItemIsEditable
@ -73,8 +82,10 @@ class EmailAccounts(QAbstractTableModel): # {{{
            return False
        row, col = index.row(), index.column()
        account = self.account_order[row]
-        if col == 2:
+        if col == 3:
            self.accounts[account][1] ^= True
        if col == 2:
            self.subjects[account] = unicode(value.toString())
        elif col == 1:
            self.accounts[account][0] = unicode(value.toString()).upper()
        else:
@ -143,7 +154,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        self.send_email_widget.initialize(self.preferred_to_address)
        self.send_email_widget.changed_signal.connect(self.changed_signal.emit)
        opts = self.send_email_widget.smtp_opts
-        self._email_accounts = EmailAccounts(opts.accounts)
+        self._email_accounts = EmailAccounts(opts.accounts, opts.subjects)
        self._email_accounts.dataChanged.connect(lambda x,y:
                self.changed_signal.emit())
        self.email_view.setModel(self._email_accounts)
@ -170,6 +181,7 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
        if not self.send_email_widget.set_email_settings(to_set):
            raise AbortCommit('abort')
        self.proxy['accounts'] =  self._email_accounts.accounts
        self.proxy['subjects'] = self._email_accounts.subjects
        return ConfigWidgetBase.commit(self)
--- a/src/calibre/gui2/search_box.py
+++ b/src/calibre/gui2/search_box.py
@ -109,7 +109,7 @@ class SearchBox2(QComboBox): # {{{
    def normalize_state(self):
        self.setToolTip(self.tool_tip_text)
        self.line_edit.setStyleSheet(
-            'QLineEdit{color:black;background-color:%s;}' % self.normal_background)
+            'QLineEdit{color:none;background-color:%s;}' % self.normal_background)
    def text(self):
        return self.currentText()
--- a/src/calibre/gui2/viewer/documentview.py
+++ b/src/calibre/gui2/viewer/documentview.py
@ -171,10 +171,11 @@ class Document(QWebPage): # {{{
            self.misc_config()
            self.after_load()
-    def __init__(self, shortcuts, parent=None):
+    def __init__(self, shortcuts, parent=None, resize_callback=lambda: None):
        QWebPage.__init__(self, parent)
        self.setObjectName("py_bridge")
        self.debug_javascript = False
        self.resize_callback = resize_callback
        self.current_language = None
        self.loaded_javascript = False
@ -237,6 +238,12 @@ class Document(QWebPage): # {{{
        if self.loaded_javascript:
            return
        self.loaded_javascript = True
        self.javascript(
            '''
            window.onresize = function(event) {
                window.py_bridge.window_resized();
            }
            ''')
        if jquery is None:
            jquery = P('content_server/jquery.js', data=True)
        self.javascript(jquery)
@ -298,6 +305,10 @@ class Document(QWebPage): # {{{
    def debug(self, msg):
        prints(msg)
    @pyqtSignature('')
    def window_resized(self):
        self.resize_callback()
    def reference_mode(self, enable):
        self.javascript(('enter' if enable else 'leave')+'_reference_mode()')
@ -424,12 +435,19 @@ class Document(QWebPage): # {{{
    def xpos(self):
        return self.mainFrame().scrollPosition().x()
-    @property
+    @dynamic_property
    def scroll_fraction(self):
        def fget(self):
            try:
                return float(self.ypos)/(self.height-self.window_height)
            except ZeroDivisionError:
                return 0.
        def fset(self, val):
            npos = val * (self.height - self.window_height)
            if npos < 0:
                npos = 0
            self.scroll_to(x=self.xpos, y=npos)
        return property(fget=fget, fset=fset)
    @property
    def hscroll_fraction(self):
@ -493,7 +511,8 @@ class DocumentView(QWebView): # {{{
        self._size_hint = QSize(510, 680)
        self.initial_pos = 0.0
        self.to_bottom = False
-        self.document = Document(self.shortcuts, parent=self)
+        self.document = Document(self.shortcuts, parent=self,
                resize_callback=self.viewport_resized)
        self.setPage(self.document)
        self.manager = None
        self._reference_mode = False
@ -630,9 +649,13 @@ class DocumentView(QWebView): # {{{
    def sizeHint(self):
        return self._size_hint
-    @property
+    @dynamic_property
    def scroll_fraction(self):
        def fget(self):
            return self.document.scroll_fraction
        def fset(self, val):
            self.document.scroll_fraction = float(val)
        return property(fget=fget, fset=fset)
    @property
    def hscroll_fraction(self):
@ -968,9 +991,11 @@ class DocumentView(QWebView): # {{{
    def resizeEvent(self, event):
        ret = QWebView.resizeEvent(self, event)
        QTimer.singleShot(10, self.initialize_scrollbar)
        return ret
    def viewport_resized(self):
        if self.manager is not None:
            self.manager.viewport_resized(self.scroll_fraction)
        return ret
    def event(self, ev):
        typ = ev.type()
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@ -240,7 +240,7 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.connect(self.action_reference_mode, SIGNAL('triggered(bool)'),
                     lambda x: self.view.reference_mode(x))
        self.connect(self.action_metadata, SIGNAL('triggered(bool)'), lambda x:self.metadata.setVisible(x))
-        self.connect(self.action_table_of_contents, SIGNAL('toggled(bool)'), lambda x:self.toc.setVisible(x))
+        self.action_table_of_contents.toggled[bool].connect(self.set_toc_visible)
        self.connect(self.action_copy, SIGNAL('triggered(bool)'), self.copy)
        self.connect(self.action_font_size_larger, SIGNAL('triggered(bool)'),
                     self.font_size_larger)
@ -310,6 +310,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
        self.restore_state()
    def set_toc_visible(self, yes):
        self.toc.setVisible(yes)
    def clear_recent_history(self, *args):
        vprefs.set('viewer_open_history', [])
        self.build_recent_menu()
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@ -121,6 +121,12 @@ class FilenamePattern(QWidget, Ui_Form):
        else:
            self.series_index.setText(_('No match'))
        if mi.publisher:
            self.publisher.setText(mi.publisher)
        if mi.pubdate:
            self.pubdate.setText(mi.pubdate.strftime('%Y-%m-%d'))
        self.isbn.setText(_('No match') if mi.isbn is None else str(mi.isbn))
@ -306,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin):
        p.setPen(pen)
        if self.draw_border:
            p.drawRect(target)
        #p.drawRect(self.rect())
        p.end()
 class CoverView(QGraphicsView, ImageDropMixin):
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -30,7 +30,7 @@ entry_points = {
             'calibre-customize  = calibre.customize.ui:main',
             'calibre-complete   = calibre.utils.complete:main',
             'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
-             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
+             'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
             'epub-fix           = calibre.ebooks.epub.fix.main:main',
             'calibre-smtp = calibre.utils.smtp:main',
        ],
@ -136,17 +136,17 @@ class PostInstall:
        self.icon_resources = []
        self.menu_resources = []
        self.mime_resources = []
-        if islinux:
+        if islinux or isfreebsd:
            self.setup_completion()
        self.install_man_pages()
-        if islinux:
+        if islinux or isfreebsd:
            self.setup_desktop_integration()
        self.create_uninstaller()
        from calibre.utils.config import config_dir
        if os.path.exists(config_dir):
            os.chdir(config_dir)
-            if islinux:
+            if islinux or isfreebsd:
                for f in os.listdir('.'):
                    if os.stat(f).st_uid == 0:
                        os.rmdir(f) if os.path.isdir(f) else os.unlink(f)
@ -183,7 +183,7 @@ class PostInstall:
            from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
            from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
            from calibre.gui2.viewer.main import option_parser as viewer_op
-            from calibre.ebooks.metadata.fetch import option_parser as fem_op
+            from calibre.ebooks.metadata.sources.cli import option_parser as fem_op
            from calibre.gui2.main import option_parser as guiop
            from calibre.utils.smtp import option_parser as smtp_op
            from calibre.library.server.main import option_parser as serv_op
--- a/Show More
+++ b/Show More