Merge

2025-07-09 03:04:10 -04:00 · 2011-04-17 07:53:00 +02:00 · 2011-04-17 07:53:00 +02:00 · 4f8b48a41c
commit 4f8b48a41c
parent 1c471dfc5e 3b78209a8d
244 changed files with 121233 additions and 57989 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -19,6 +19,142 @@
 #  new recipes:
 #    - title: 

+- version: 0.7.55
+  date: 2011-04-15
+
+  new features:
+    - title: "Add a menu bar. Useful if you use a lot of plugins and are running out of space in your toolbars. By default the menu bar is hidden (except on OS X). You can add actions to it via Preferences->Toolbars. As soon as you add actions, it will become visible."
+
+    - title: "OS X: Make the main calibre window look a little more 'native' on OS X"
+
+    - title: "Show recently viewed books in the View button's drop down menu"
+
+    - title: "Add a button next to the search bar to toggle easily between highlight and restrict search modes"
+
+    - title: "Allow the use of arbitrary searches as search restrictions, rather than just saved searches. Do this by using the special entry '*Current Search' in the Search Restriction dropdown."
+
+    - title: "The Connect/share icon now changes color to indicate that the content server is running"
+      tickets: [755444]
+
+    - title: "Device drivers for Viewpad 7, Motorola Xoom and Asus Eee Note"
+
+    - title: "Add tags like composite custom column."
+      tickets: [759663]
+
+    - title: "Add a new date format code 'iso'. Permits formatting dates to see the complete time (via Preferences->Tweaks)"
+
+    - title: "Allow the use of data from the size column in the template language"
+      tickets: [759645]
+
+    - title: "Support reading/writing covers to txtz/htmlz files"
+
+    - title: "Speedup for large library sorting when using composite custom columns"
+
+    - title: "Move the boolean columns are tristate tweak to Preferences->Behavior"
+  
+  bug fixes:
+    - title: "Fix a regression in 0.7.54 that broke reading covers/metadata from cbz files."
+      tickets: [756892]
+
+    - title: "Fix tweak names and help not translatable"
+      tickets: [756736]
+
+    - title: "When the size of a book is less that 0.1MB but not zero, display the size as <0.1 instead of 0.0."
+      tickets: [755768]
+
+    - title: "HTMLZ input: Fix handling of HTML files encoded in an encoding other than UTF-8"
+
+    - title: "EPUB Input: Fix EPUB files with empty Adobe PAGE templates causing conversion to abort."
+      tickets: [760390]
+
+    - title: "Fix CHM input plugin not closing opened input file"
+      tickets: [760589]
+
+    - title: "MOBI Output: Make super/subscripts use a slightly smaller font when rendered on a Kindle. Also allow the use of vertical-align:top/bottom in the CSS to specify a super/subscript."
+      tickets: [758667]
+
+    - title: "LRF Input: Detect and workaround LRF files that have deeply nested spans, instead of crashing."
+      tickets: [759680]
+
+    - title: "MOBI Output: Fix bug that would cause conversion to unneccessarily abort when malformed hyperlinks are present in the input document."
+      tickets: [759313]
+
+    - title: "Make true and false searches work correctly for numeric fields."
+
+    - title: "MOBI Output: The Ignore margins setting no longer ignores blockquotes, only margins set via CSS on other elements."
+      tickets: [758675]
+
+    - title: "Fix regression that caused clicking auto send to also change the email address in Preferences->Email"
+
+  improved recipes:
+    - Wall Street Journal
+    - Weblogs SL
+    - Tabu.ro
+    - Vecernje Novosti
+
+  new recipes:
+    - title: Hallo Assen and Dvhn
+      author: Reijendert
+
+
+- version: 0.7.54
+  date: 2011-04-08
+
+  new features:
+    - title: "New output format, HTMLZ which is a single HTML file with its associated images/stylesheets in a zipped up file"
+      description: "Useful when you want to convert your ebook into a single HTML file for easy editing. Note that this output plugin is still new and needs testing"
+
+    - title: "When dealing with ZIP/RAR archives, use the file header rather than the file extension to detrmine the file type, when possible. This fixes the common case of CBZ files being actually cbr files and vice versa"
+
+    - title: "Support for the Motorola Atrix"
+
+    - title: "Allow the icons in the toolbar to be turned off completely via Preferences->Look & Feel"
+
+    - title: "When downloading metadata use the gzip transfer encoding when possible for a speedup."
+      tickets: [749304]
+  
+  bug fixes:
+    - title: "Conversion pipeline: Workaround for bug in lxml that causes a massive mem leak on windows and OS X when the input document contains non ASCII CSS selectors."
+      tickets: [754555]
+
+    - title: "Conversion pipeline: Handle inline <style> tags that put all the actual CSS inside an XML comment."
+      tickets: [750063]
+
+    - title: "The 'Choose Library' button now shows its popup menu when you already have more than one library instead of the dialog to create a new library"
+      tickets: [754154]
+
+    - title: "Apply all content server setting when clicking the Start Server button in Preferences->Sharing over the net"
+      tickets: [753122]
+
+    - title: "Fix content server breaking if its restriction is set to a saved search that was deleted"
+      tickets: [751950]
+
+    - title: "Fix detection of PocketBook with 2.0.6 firmware on windows"
+      tickets: [750336]
+
+    - title: "ODT Input: Fix handling of the <text:s> element."
+      tickets: [749655]
+
+    - title: "MOBI Output: Don't use self closed tags"
+
+    - title: "Fix book details popup becoming too tall if there is a lot of metadata"
+
+    - title: "Fix new PDF engine crashing on PDF files with embedded fonts with null names"
+
+  improved recipes:
+    - Kommersant
+    - Perfil
+    - Times of India
+    - IHT
+    - Guardian
+
+  new recipes:
+    - title: "Al Ahram"
+      authors: Hassan Williamson
+
+    - title: "F-Secure and developpez.com"
+      authors: louhike
+
 - version: 0.7.53
  date: 2011-04-01

--- a/8
+++ b/8
@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
-the facilities of the calibre-debug command to hack on the calibre source. 
+
+Note that you *do not* need to install from source to hack on
+the calibre source code. To get started with calibre development,
+use a normal calibre install and follow the instructions at
+http://calibre-ebook.com/user_manual/develop.html

 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
    sudo python setup.py develop

 Use the -h flag for help on the develop command.
+
--- a/2
+++ b/2
@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
 For screenshots: https://calibre-ebook.com/demo

 For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual

 For source code access:
 bzr branch lp:calibre
--- a/recipes/al_ahram.recipe
+++ b/recipes/al_ahram.recipe
@ -0,0 +1,62 @@
+# coding=utf-8
+__license__   = 'GPL v3'
+__copyright__ = '2011, Hassan Williamson <haz at hazrpg.co.uk>'
+'''
+ahram.org.eg
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class AlAhram(BasicNewsRecipe):
+    title                  = 'Al-Ahram'
+    __author__             = 'Hassan Williamson'
+    description            = 'News from Egypt in Arabic.'
+    oldest_article         = 7
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                 = 1
+    use_embedded_content   = False
+    encoding               = 'utf8'
+    publisher              = 'Al-Ahram'
+    category               = 'News'
+    language               = 'ar'
+    publication_type       = 'newsportal'
+    extra_css              = ' body{ font-family: Verdana,Helvetica,Arial,sans-serif; direction: rtl; } .txtTitle{ font-weight: bold; } '
+
+
+    keep_only_tags    = [
+                            dict(name='div', attrs={'class':['bbcolright']})
+                        ]
+
+    remove_tags       = [
+                            dict(name='div', attrs={'class':['bbnav', 'bbsp']}),
+                            dict(name='div', attrs={'id':['AddThisButton']})
+                        ]
+
+    remove_attributes = [
+                            'width','height'
+                        ]
+
+    feeds             = [
+                            (u'الأولى', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=25'),
+                            (u'مصر', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=27'),
+                            (u'المحافظات', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=29'),
+                            (u'الوطن العربي', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=31'),
+                            (u'العالم', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=26'),
+                            (u'تقارير المراسلين', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=2'),
+                            (u'تحقيقات', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=3'),
+                            (u'قضايا واراء', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=4'),
+                            (u'اقتصاد', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=5'),
+                            (u'رياضة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=6'),
+                            (u'حوادث', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=38'),
+                            (u'دنيا الثقافة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=7'),
+                            (u'المراة والطفل', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=8'),
+                            (u'يوم جديد', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=9'),
+                            (u'الكتاب', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=10'),
+                            (u'الاعمدة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=11'),
+                            (u'أراء حرة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=59'),
+                            (u'ملفات الاهرام', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=12'),
+                            (u'بريد الاهرام', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=15'),
+                            (u'الاخيرة', 'http://www.ahram.org.eg/RssXml.aspx?CategoryID=16'),
+                        ]
+
+
--- a/recipes/big_oven.recipe
+++ b/recipes/big_oven.recipe
@ -36,29 +36,38 @@ class BigOven(BasicNewsRecipe):

    remove_attributes = ['style', 'font']

-    remove_tags     = [dict(name='div', attrs={'class':['ppy-caption']})
-                                  ,dict(name='div', attrs={'id':['float_corner']})
+    def get_article_url(self, article):
+        url = article.get('feedburner_origlink',article.get('link', None))
+        front, middle, end = url.partition('comhttp//www.bigoven.com')
+        url = front + 'com' + end
+        return url
+
+    keep_only_tags = [dict(name='div', attrs={'id':['nosidebar_main']})]
+
+    remove_tags_after = [dict(name='div', attrs={'class':['display-field']})]
+
+    remove_tags =  [dict(name='ul', attrs={'class':['tabs']})]
+
+    preprocess_regexps = [
+        (re.compile(r'Want detailed nutrition information?', re.DOTALL), lambda match: ''),
+        (re.compile('\(You could win \$100 in our ', re.DOTALL), lambda match: ''),
         ]

    def preprocess_html(self, soup):
-        for tag in soup.findAll(name='a', attrs={'class':['deflink']}):
-          tag.replaceWith(tag.string)
        for tag in soup.findAll(name='a', text=re.compile(r'.*View Metric.*', re.DOTALL)):
            tag.parent.parent.extract()
-        for tag in soup.findAll(name='a', text=re.compile(r'.*Add my own photo.*', re.DOTALL)):
-          tag.parent.parent.extract()
-        for tag in soup.findAll(name='div', attrs={'class':['container']}):
-          if tag.find(name='h1'):
-              continue
-          if tag.find(name='h2', text=re.compile(r'.*Ingredients.*', re.DOTALL)):
-              print 'tag found Ingred h2'
-              continue
-          if tag.find(name='h2', text=re.compile(r'Preparation.*', re.DOTALL)):
-              print 'tag found Prep h2'
-              continue
+        for tag in soup.findAll(text=re.compile(r'.*Try BigOven Pro for Free.*', re.DOTALL)):
            tag.extract()
+        for tag in soup.findAll(text=re.compile(r'.*Add my photo of this recipe.*', re.DOTALL)):
+            tag.parent.extract()
+        for tag in soup.findAll(name='a', text=re.compile(r'.*photo contest.*', re.DOTALL)):
+            tag.parent.extract()
+        for tag in soup.findAll(name='a', text='Remove ads'):
+            tag.parent.parent.extract()
+        for tag in soup.findAll(name='ol', attrs={'class':['recipe-tags']}):
+            tag.parent.extract()
        return soup

-    feeds = [(u'4 & 5 Star Rated Recipes', u'http://feeds.feedburner.com/Bigovencom-RecipeRaves?format=xml')]
-    
+    feeds = [(u'Recent Raves', u'http://www.bigoven.com/rss/recentraves'),
+                   (u'Recipe Of The Day', u'http://feeds.feedburner.com/bigovencom-RecipeOfTheDay')]

--- a/recipes/brand_eins.recipe
+++ b/recipes/brand_eins.recipe
@ -3,8 +3,7 @@

 __license__   = 'GPL v3'
 __copyright__ = '2010, Constantin Hofstetter <consti at consti.de>, Steffen Siebert <calibre at steffensiebert.de>'
-__version__   = '0.97'
-
+__version__   = '0.98' # 2011-04-10
 ''' http://brandeins.de - Wirtschaftsmagazin '''
 import re
 import string
@ -14,8 +13,8 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
 class BrandEins(BasicNewsRecipe):

  title = u'brand eins'
-  __author__ = 'Constantin Hofstetter'
-  description = u'Wirtschaftsmagazin'
+  __author__ = 'Constantin Hofstetter; Steffen Siebert'
+  description = u'Wirtschaftsmagazin: Gets the last full issue on default. Set a integer value for the username-field to get older issues: 1 -> the newest (but not complete) issue, 2 -> the last complete issue (default), 3 -> the issue before 2 etc.'
  publisher ='brandeins.de'
  category = 'politics, business, wirtschaft, Germany'
  use_embedded_content = False
--- a/recipes/developpez.recipe
+++ b/recipes/developpez.recipe
@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301849956(BasicNewsRecipe):
+    title          = u'Developpez.com'
+    description = u'Toutes les news du site Developpez.com'
+    publisher = u'Developpez.com'
+    timefmt = ' [%a, %d %b, %Y]'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    encoding = 'ISO-8859-1'
+    language = 'fr'
+    __author__ = 'louhike'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+
+    feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
+
+    def get_cover_url(self):
+        return 'http://javascript.developpez.com/template/images/logo.gif'
+
--- a/recipes/dvhn.recipe
+++ b/recipes/dvhn.recipe
@ -0,0 +1,32 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1302341394(BasicNewsRecipe):
+    title          = u'DvhN'
+    oldest_article = 1
+    max_articles_per_feed = 200
+
+    __author__ = 'Reijndert'
+    no_stylesheets = True
+    cover_url = 'http://www.dvhn.nl/template/Dagblad_v2.0/gfx/logo_DvhN.gif'
+    language = 'nl'
+    country = 'NL'
+    version = 1
+    publisher = u'Dagblad van het Noorden'
+    category = u'Nieuws'
+    description = u'Nieuws uit Noord Nederland'
+
+
+    keep_only_tags = [dict(name='div', attrs={'id':'fullPicture'})
+                               ,dict(name='div', attrs={'id':'articleText'})
+                  ]
+
+    remove_tags = [
+                    dict(name=['object','link','iframe','base'])
+                    ,dict(name='span',attrs={'class':'copyright'})
+                ]
+
+    feeds          = [(u'Drenthe', u'http://www.dvhn.nl/nieuws/drenthe/index.jsp?service=rss'), (u'Groningen', u'http://www.dvhn.nl/nieuws/groningen/index.jsp?service=rss'), (u'Nederland', u'http://www.dvhn.nl/nieuws/nederland/index.jsp?service=rss'), (u'Wereld', u'http://www.dvhn.nl/nieuws/wereld/index.jsp?service=rss'), (u'Economie', u'http://www.dvhn.nl/nieuws/economie/index.jsp?service=rss'), (u'Sport', u'http://www.dvhn.nl/nieuws/sport/index.jsp?service=rss'), (u'Cultuur', u'http://www.dvhn.nl/nieuws/kunst/index.jsp?service=rss'), (u'24 Uur', u'http://www.dvhn.nl/nieuws/24uurdvhn/index.jsp?service=rss&selectiontype=last24hours')]
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                '''
--- a/recipes/economist.recipe
+++ b/recipes/economist.recipe
@ -18,7 +18,8 @@ class Economist(BasicNewsRecipe):

    __author__ = "Kovid Goyal"
    INDEX = 'http://www.economist.com/printedition'
-    description = 'Global news and current affairs from a European perspective.'
+    description = ('Global news and current affairs from a European'
+            ' perspective. Best downloaded on Friday mornings (GMT)')

    oldest_article = 7.0
    cover_url = 'http://www.economist.com/images/covers/currentcoverus_large.jpg'
--- a/recipes/economist_free.recipe
+++ b/recipes/economist_free.recipe
@ -11,7 +11,8 @@ class Economist(BasicNewsRecipe):
    language = 'en'

    __author__ = "Kovid Goyal"
-    description = ('Global news and current affairs from a European perspective.'
+    description = ('Global news and current affairs from a European'
+            ' perspective. Best downloaded on Friday mornings (GMT).'
            ' Much slower than the print edition based version.')

    oldest_article = 7.0
--- a/recipes/f_secure.recipe
+++ b/recipes/f_secure.recipe
@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301860159(BasicNewsRecipe):
+    title          = u'F-Secure Weblog'
+    language = 'en'
+    __author__ = 'louhike'
+    description = u'All the news from the weblog of F-Secure'
+    publisher = u'F-Secure'
+    timefmt = ' [%a, %d %b, %Y]'
+    encoding = 'ISO-8859-1'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content   = False
+    language = 'en_EN'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
+    remove_tags = [dict(name='a'),dict(name='hr')]
+
+    feeds          = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
+    def get_cover_url(self):
+        return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
--- a/recipes/financial_times.recipe
+++ b/recipes/financial_times.recipe
@ -11,7 +11,8 @@ from calibre.web.feeds.news import BasicNewsRecipe
 class FinancialTimes(BasicNewsRecipe):
    title                 = u'Financial Times'
    __author__            = 'Darko Miletic and Sujata Raman'
-    description           = 'Financial world news'
+    description           = ('Financial world news. Available after 5AM '
+                                'GMT, daily.')
    oldest_article        = 2
    language = 'en'

--- a/recipes/hallo_assen.recipe
+++ b/recipes/hallo_assen.recipe
@ -0,0 +1,36 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1302341394(BasicNewsRecipe):
+    title          = u'Hallo Assen'
+    oldest_article = 180
+    max_articles_per_feed = 100
+
+    __author__ = 'Reijndert'
+    no_stylesheets = True
+    cover_url = 'http://www.halloassen.nl/multimedia/halloassen/archive/00002/HalloAssen_2518a.gif'
+    language = 'nl'
+    country = 'NL'
+    version = 1
+    category = u'Nieuws'
+    timefmt = ' %Y-%m-%d (%a)'
+
+
+
+    keep_only_tags = [dict(name='div', attrs={'class':'photoFrame'})
+                               ,dict(name='div', attrs={'class':'textContent'})
+                  ]
+
+    remove_tags = [
+                    dict(name='div',attrs={'id':'articleLinks'})
+                    ,dict(name='div',attrs={'class':'categories clearfix'})
+                    ,dict(name='div',attrs={'id':'rating'})
+                    ,dict(name='div',attrs={'id':'comments'})
+                ]
+
+    feeds          = [(u'Ons Nieuws', u'http://feeds.feedburner.com/halloassen/onsnieuws'), (u'Politie', u'http://www.halloassen.nl/rss/?c=37'), (u'Rechtbank', u'http://www.halloassen.nl/rss/?c=39'), (u'Justitie', u'http://www.halloassen.nl/rss/?c=36'), (u'Evenementen', u'http://www.halloassen.nl/rss/?c=34'), (u'Cultuur', u'http://www.halloassen.nl/rss/?c=32'), (u'Politiek', u'http://www.halloassen.nl/rss/?c=38'), (u'Economie', u'http://www.halloassen.nl/rss/?c=33')]
+
+
+    extra_css = '''
+                body {font-family: verdana, arial, helvetica, geneva, sans-serif;}
+                '''
+
--- a/recipes/irish_times.recipe
+++ b/recipes/irish_times.recipe
@ -18,7 +18,6 @@ class IrishTimes(BasicNewsRecipe):
    oldest_article = 1.0
    max_articles_per_feed  = 100
    no_stylesheets = True
-    simultaneous_downloads= 5

    r = re.compile('.*(?P<url>http:\/\/(www.irishtimes.com)|(rss.feedsportal.com\/c)\/.*\.html?).*')
    remove_tags    = [dict(name='div', attrs={'class':'footer'})]
@ -26,17 +25,17 @@ class IrishTimes(BasicNewsRecipe):

    feeds          = [
                      ('Frontpage', 'http://www.irishtimes.com/feeds/rss/newspaper/index.rss'),
-                      ('Ireland', 'http://www.irishtimes.com/feeds/rss/newspaper/ireland.rss'),
-                      ('World', 'http://www.irishtimes.com/feeds/rss/newspaper/world.rss'),
-                      ('Finance', 'http://www.irishtimes.com/feeds/rss/newspaper/finance.rss'),
-                      ('Features', 'http://www.irishtimes.com/feeds/rss/newspaper/features.rss'),
-                      ('Sport', 'http://www.irishtimes.com/feeds/rss/newspaper/sport.rss'),
-                      ('Opinion', 'http://www.irishtimes.com/feeds/rss/newspaper/opinion.rss'),
-                      ('Letters', 'http://www.irishtimes.com/feeds/rss/newspaper/letters.rss'),
+                      ('Ireland', 'http://rss.feedsportal.com/c/851/f/10845/index.rss'),
+                      ('World', 'http://rss.feedsportal.com/c/851/f/10846/index.rss'),
+                      ('Finance', 'http://rss.feedsportal.com/c/851/f/10847/index.rss'),
+                      ('Features', 'http://rss.feedsportal.com/c/851/f/10848/index.rss'),
+                      ('Sport', 'http://rss.feedsportal.com/c/851/f/10849/index.rss'),
+                      ('Opinion', 'http://rss.feedsportal.com/c/851/f/10850/index.rss'),
+                      ('Letters', 'http://rss.feedsportal.com/c/851/f/10851/index.rss'),
                      ('Magazine', 'http://www.irishtimes.com/feeds/rss/newspaper/magazine.rss'),
-                      ('Health', 'http://www.irishtimes.com/feeds/rss/newspaper/health.rss'),
-                      ('Education & Parenting', 'http://www.irishtimes.com/feeds/rss/newspaper/education.rss'),
-                      ('Motors', 'http://www.irishtimes.com/feeds/rss/newspaper/motors.rss'),
+                      ('Health', 'http://rss.feedsportal.com/c/851/f/10852/index.rss'),
+                      ('Education & Parenting', 'http://rss.feedsportal.com/c/851/f/10853/index.rss'),
+                      ('Motors', 'http://rss.feedsportal.com/c/851/f/10854/index.rss'),
                      ('An Teanga Bheo', 'http://www.irishtimes.com/feeds/rss/newspaper/anteangabheo.rss'),
                      ('Commercial Property', 'http://www.irishtimes.com/feeds/rss/newspaper/commercialproperty.rss'),
                      ('Science Today', 'http://www.irishtimes.com/feeds/rss/newspaper/sciencetoday.rss'),
@ -57,5 +56,3 @@ class IrishTimes(BasicNewsRecipe):

    def get_article_url(self, article):
        return article.link
-
-
--- a/recipes/kommersant.recipe
+++ b/recipes/kommersant.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kommersant.ru
 '''
@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe):
    language              = 'ru'
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
-    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
+    extra_css             = """ 
+	                          @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+	                          body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif}
+							  .title{font-size: x-large; font-weight: bold; margin-bottom: 1em}
+							  .subtitle{font-size: large; margin-bottom: 1em}
+							  .document_vvodka{font-weight: bold; margin-bottom: 1em}
+							"""

    conversion_options = {
                          'comment'          : description
@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe):
                        , 'language'         : language
                        }

-    keep_only_tags = [
-                         dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
-                        ,dict(attrs={'class':['vvodka','paragraph','author']})
-                     ]
-    remove_tags        = [dict(name=['iframe','object','link','img','base'])]
+    keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
+    remove_tags    = [dict(name=['iframe','object','link','img','base','meta'])]

    feeds       = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]

    def print_version(self, url):	    
-        return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'
+        return url.replace('/doc-rss/','/Doc/') + '/Print'
 		
--- a/recipes/novosti.recipe
+++ b/recipes/novosti.recipe
@ -1,6 +1,6 @@

 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 novosti.rs
 '''
@ -21,10 +21,12 @@ class Novosti(BasicNewsRecipe):
    encoding              = 'utf-8'
    language              = 'sr'
    publication_type      = 'newspaper'
+    masthead_url          = 'http://www.novosti.rs/images/basic/logo-print.png'
    extra_css             = """ @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                .article_description,body{font-family: Arial,Helvetica,sans1,sans-serif}
                                .author{font-size: small}
                                .articleLead{font-size: large; font-weight: bold}
+                                img{display: block; margin-bottom: 1em; margin-top: 1em}
                            """

    conversion_options = {
@ -32,23 +34,58 @@ class Novosti(BasicNewsRecipe):
                        , 'tags'         : category
                        , 'publisher'    : publisher
                        , 'language'     : language
+                        , 'pretty_print' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]

-    keep_only_tags     = [dict(attrs={'class':['articleTitle','author','articleLead','articleBody']})]
-    remove_tags        = [dict(name=['embed','object','iframe','base','link','meta'])]
-    feeds              = [(u'Vesti', u'http://www.novosti.rs/rss/rss-vesti')]
+    keep_only_tags     = [dict(attrs={'class':['articleTitle','articleInfo','articleLead','singlePhoto fl','articleBody']})]
+    remove_tags        = [
+                            dict(name=['embed','object','iframe','base','link','meta'])
+                           ,dict(name='a', attrs={'class':'loadComments topCommentsLink'})
+                         ]
+    remove_attributes  = ['lang','xmlns:fb']
+    
+    feeds              = [
+	                        (u'Politika'     , u'http://www.novosti.rs/rss/2-Sve%20vesti')
+	                       ,(u'Drustvo'      , u'http://www.novosti.rs/rss/1-Sve%20vesti')
+	                       ,(u'Ekonomija'    , u'http://www.novosti.rs/rss/3-Sve%20vesti')
+	                       ,(u'Hronika'      , u'http://www.novosti.rs/rss/4-Sve%20vesti')
+	                       ,(u'Dosije'       , u'http://www.novosti.rs/rss/5-Sve%20vesti')
+	                       ,(u'Reportaze'    , u'http://www.novosti.rs/rss/6-Sve%20vesti')
+	                       ,(u'Tehnologije'  , u'http://www.novosti.rs/rss/35-Sve%20vesti')
+	                       ,(u'Zanimljivosti', u'http://www.novosti.rs/rss/26-Sve%20vesti')
+	                       ,(u'Auto'         , u'http://www.novosti.rs/rss/50-Sve%20vesti')
+	                       ,(u'Sport'        , u'http://www.novosti.rs/rss/11|47|12|14|13-Sve%20vesti')
+	                       ,(u'Svet'         , u'http://www.novosti.rs/rss/7-Sve%20vesti')
+	                       ,(u'Region'       , u'http://www.novosti.rs/rss/8-Sve%20vesti')
+	                       ,(u'Dijaspora'    , u'http://www.novosti.rs/rss/9-Sve%20vesti')
+	                       ,(u'Spektakl'     , u'http://www.novosti.rs/rss/10-Sve%20vesti')
+	                       ,(u'Kultura'      , u'http://www.novosti.rs/rss/31-Sve%20vesti')
+	                       ,(u'Srbija'       , u'http://www.novosti.rs/rss/15-Sve%20vesti')
+	                       ,(u'Beograd'      , u'http://www.novosti.rs/rss/16-Sve%20vesti')
+	                       ,(u'Zivot+'       , u'http://www.novosti.rs/rss/24|33|34|25|20|18|32|19-Sve%20vesti')
+	                       ,(u'Turizam'      , u'http://www.novosti.rs/rss/36-Sve%20vesti')
+						 ]

    def preprocess_html(self, soup):
        for item in soup.findAll(style=True):
            del item['style']
-        for item in soup.findAll('span', attrs={'class':'author'}):
-            item.name='p'
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
        for item in soup.findAll('img'):
            if not item.has_key('alt'):
               item['alt'] = 'image'
        return soup


-
--- a/recipes/nspm.recipe
+++ b/recipes/nspm.recipe
@ -1,12 +1,12 @@
 __license__   = 'GPL v3'
-__copyright__ = '2008-2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2008-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 nspm.rs
 '''

 import re
 from calibre.web.feeds.news import BasicNewsRecipe
-from calibre.ebooks.BeautifulSoup import NavigableString
+from calibre.ebooks.BeautifulSoup import NavigableString, Tag

 class Nspm(BasicNewsRecipe):
    title                 = 'Nova srpska politicka misao'
@ -21,7 +21,6 @@ class Nspm(BasicNewsRecipe):
    INDEX                 = 'http://www.nspm.rs/?alphabet=l'
    encoding              = 'utf-8'
    language              = 'sr'
-    delay                 = 2
    remove_empty_feeds    = True
    publication_type      = 'magazine'
    masthead_url          = 'http://www.nspm.rs/templates/jsn_epic_pro/images/logol.jpg'
@ -29,7 +28,7 @@ class Nspm(BasicNewsRecipe):
                                @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
                                body{font-family: "Times New Roman", serif1, serif}
                                .article_description{font-family: Arial, sans1, sans-serif}
-                                img{margin-top:0.5em; margin-bottom: 0.7em}
+                                img{margin-top:0.5em; margin-bottom: 0.7em; display: block}
                                .author{color: #990000; font-weight: bold}
                                .author,.createdate{font-size: 0.9em} """

@ -38,18 +37,12 @@ class Nspm(BasicNewsRecipe):
                        , 'tags'         : category
                        , 'publisher'    : publisher
                        , 'language'     : language
-                        , 'linearize_tables' : True
+                        , 'pretty_print' : True
                        }

    preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
-    keep_only_tags = [dict(attrs={'id':'jsn-mainbody'})]
-    remove_tags        = [
-                           dict(name=['link','object','embed','script','meta','base','iframe'])
-                          ,dict(attrs={'class':'buttonheading'})
-                         ]
-    remove_tags_before = dict(attrs={'class':'contentheading'})
-    remove_tags_after  = dict(attrs={'class':'article_separator'})
-    remove_attributes  = ['width','height']
+    remove_tags        = [dict(name=['link','script','meta','base','img'])]
+    remove_attributes  = ['width','height','lang','xmlns:fb','xmlns:og','vspace','hspace','type','start','size']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -68,10 +61,56 @@ class Nspm(BasicNewsRecipe):
            ]

    def preprocess_html(self, soup):
-        for item in soup.body.findAll(style=True):
-            del item['style']
-        for item in soup.body.findAll('h1'):
-            nh = NavigableString(item.a.string)
-            item.a.extract()
-            item.insert(0,nh)
-        return self.adeify_images(soup)
+        atitle = soup.body.find('a',attrs={'class':'contentpagetitle'})
+        if atitle:
+           cleanTitle = Tag(soup,'h1',[('class','contentpagetitle')])
+           cnt        = NavigableString(self.tag_to_string(atitle))
+           cleanTitle.append(cnt)
+           
+        author = soup.body.find('span',attrs={'class':'author'})
+        if author:
+           author.extract()
+           author.name = 'div'
+           
+        crdate = soup.body.find('td',attrs={'class':'createdate'})
+        if crdate:
+           cleanCrdate = Tag(soup,'div',[('class','createdate')])
+           cnt         = NavigableString(self.tag_to_string(crdate))
+           cleanCrdate.append(cnt)
+
+           #get the dependant element
+           artText = Tag(soup,'div',[('class','text')])
+           textHolderp = crdate.parent
+           textHolder = textHolderp.nextSibling
+           while textHolder and (not isinstance(textHolder,Tag) or (textHolder.name <> textHolderp.name)):
+                 textHolder = textHolder.nextSibling
+           if textHolder.td:
+              artText          = textHolder.td
+              artText.name     = 'div'
+              artText.attrs    = []
+              artText['class'] = 'text'
+              artText.extract()
+           
+           soup.body.contents=[]
+
+           soup.body.append(cleanTitle)
+           soup.body.append(author)
+           soup.body.append(cleanCrdate)
+           soup.body.append(artText)
+
+        for item in soup.findAll('a'):
+            limg = item.find('img')
+            if item.string is not None:
+               str = item.string
+               item.replaceWith(str)
+            else:
+               if limg:
+                  item.name = 'div'
+                  item.attrs = []
+               else:
+                   str = self.tag_to_string(item)
+                   item.replaceWith(str)
+        for item in soup.findAll('img'):
+            if not item.has_key('alt'):
+               item['alt'] = 'image'
+        return soup
--- a/recipes/perfil.recipe
+++ b/recipes/perfil.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 perfil.com
 '''
@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
                      dict(name=['iframe','embed','object','base','meta','link'])
                     ,dict(name='a', attrs={'href':'#comentarios'})
                     ,dict(name='div', attrs={'class':'foto3'})
-                     ,dict(name='img', attrs={'alt':'ampliar'})
+                     ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
                    ]
-    keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
+    keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
    remove_attributes=['onload','lang','width','height','border']

    feeds = [
--- a/recipes/sueddeutschezeitung.recipe
+++ b/recipes/sueddeutschezeitung.recipe
@ -1,4 +1,3 @@
-
 __license__   = 'GPL v3'
 __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 '''
@ -19,11 +18,11 @@ class SueddeutcheZeitung(BasicNewsRecipe):
    encoding               = 'cp1252'
    needs_subscription     = True
    remove_empty_feeds     = True
-    delay                  = 2
+    delay                  = 1
    PREFIX                 = 'http://www.sueddeutsche.de'
    INDEX                  = PREFIX + '/app/epaper/textversion/'
    use_embedded_content   = False
-    masthead_url           = 'http://pix.sueddeutsche.de/img/layout/header/logo.gif'
+    masthead_url           = 'http://pix.sueddeutsche.de/img/layout/header/SZ_solo288x31.gif'
    language               = 'de'
    publication_type       = 'newspaper'
    extra_css              = ' body{font-family: Arial,Helvetica,sans-serif} '
@ -36,7 +35,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):
                        , 'linearize_tables' : True
                        }

-    remove_attributes = ['height','width']
+    remove_attributes = ['height','width','style']

    def get_browser(self):
        br = BasicNewsRecipe.get_browser()
@ -50,7 +49,7 @@ class SueddeutcheZeitung(BasicNewsRecipe):

    remove_tags        =[
                         dict(attrs={'class':'hidePrint'})
-                        ,dict(name=['link','object','embed','base','iframe'])
+                        ,dict(name=['link','object','embed','base','iframe','br'])
                        ]
    keep_only_tags     = [dict(attrs={'class':'artikelBox'})]
    remove_tags_before =  dict(attrs={'class':'artikelTitel'})
@ -68,6 +67,19 @@ class SueddeutcheZeitung(BasicNewsRecipe):
              ,(u'Sport'         , INDEX + 'Sport/'        )
              ,(u'Bayern'        , INDEX + 'Bayern/'       )
              ,(u'Muenchen'      , INDEX + 'M%FCnchen/'    )
+              ,(u'Muenchen City' , INDEX + 'M%FCnchen+City/' )
+              ,(u'Jetzt.de'         , INDEX + 'Jetzt.de/'        )
+              ,(u'Reise'         , INDEX + 'Reise/'        )
+              ,(u'SZ Extra'         , INDEX + 'SZ+Extra/'        )
+              ,(u'Wochenende'    , INDEX + 'SZ+am+Wochenende/' )
+              ,(u'Stellen-Markt'  , INDEX + 'Stellen-Markt/')
+              ,(u'Motormarkt'  , INDEX + 'Motormarkt/')
+              ,(u'Immobilien-Markt', INDEX + 'Immobilien-Markt/')
+              ,(u'Thema'         , INDEX + 'Thema/'        )
+              ,(u'Forum'         , INDEX + 'Forum/'        )
+              ,(u'Leute'         , INDEX + 'Leute/'        )
+              ,(u'Jugend'         , INDEX + 'Jugend/'        )
+              ,(u'Beilage'         , INDEX + 'Beilage/'        )
            ]

    def parse_index(self):
--- a/recipes/tabu.recipe
+++ b/recipes/tabu.recipe
@ -31,23 +31,22 @@ class TabuRo(BasicNewsRecipe):
                         }

    keep_only_tags = [
-           dict(name='div', attrs={'id':'Article'}),
+                        dict(name='h2', attrs={'class':'articol_titlu'}),
+                        dict(name='div', attrs={'class':'poza_articol_featured'}),
+                        dict(name='div', attrs={'class':'articol_text'})
                     ]

    remove_tags = [
-            dict(name='div', attrs={'id':['advertisementArticle']}),
-                        dict(name='div', attrs={'class':'voting_number'}),
-            dict(name='div', attrs={'id':'number_votes'}),
-            dict(name='div', attrs={'id':'rating_one'}),
-            dict(name='div', attrs={'class':'float: right;'})
+            dict(name='div', attrs={'class':'asemanatoare'})
                           ]

    remove_tags_after = [
                      dict(name='div', attrs={'id':'comments'}),
+                      dict(name='div', attrs={'class':'asemanatoare'})
                        ]

    feeds          = [
-        (u'Feeds', u'http://www.tabu.ro/rss_all.xml')
+                        (u'Feeds', u'http://www.tabu.ro/feed/')
                     ]

    def preprocess_html(self, soup):
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe

 class TimesOfIndia(BasicNewsRecipe):
@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe):
    max_articles_per_feed = 25

    no_stylesheets = True
-    keep_only_tags = [dict(attrs={'class':'maintable12'})]
+    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
    remove_tags = [
            dict(style=lambda x: x and 'float' in x),
-            dict(attrs={'class':'prvnxtbg'}),
+            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
    ]

    feeds          = [
@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
-    def print_version(self, url):
-        return url + '?prtpage=1'
+
+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        if '/0Ltimesofindia' in url:
+            url = url.partition('/0L')[-1]
+            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
+                    '/').replace('0E', '-')
+            url = 'http://' + url.rpartition('/')[0]
+            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
+            if match is not None:
+                num = match.group(1)
+                num = re.sub(r'[^0-9]', '', num)
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    num)
+        else:
+            cms = re.search(r'/(\d+)\.cms', url)
+            if cms is not None:
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    cms.group(1))
+
+        return url
+

    def preprocess_html(self, soup):
        return soup
--- a/recipes/weblogs_sl.recipe
+++ b/recipes/weblogs_sl.recipe
@ -3,7 +3,7 @@ __license__     = 'GPL v3'
 __copyright__   = '4 February 2011, desUBIKado'
 __author__      = 'desUBIKado'
 __version__     = 'v0.05'
-__date__        = '9, February 2011'
+__date__        = '13, April 2011'
 '''
 http://www.weblogssl.com/
 '''
@ -19,7 +19,7 @@ class weblogssl(BasicNewsRecipe):
    category       = 'Gadgets, Tech news, Product reviews, mobiles, science, cinema, entertainment, culture, tv, food, recipes, life style, motor, F1, sports, economy'
    language       = 'es'
    timefmt        = '[%a, %d %b, %Y]'
-    oldest_article = 1.5
+    oldest_article = 1
    max_articles_per_feed = 100
    encoding       = 'utf-8'
    use_embedded_content  = False
@ -28,50 +28,52 @@ class weblogssl(BasicNewsRecipe):
    no_stylesheets = True

    # Si no se quiere recuperar todos los blogs se puede suprimir la descarga del que se desee poniendo
-    # un caracter # por delante, es decir,  # (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
-    # haría que no se descargase Applesfera. OJO: El último feed no debe llevar la coma al final
+    # un caracter # por delante, es decir,  # ,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
+    # haría que no se descargase Applesfera.

    feeds              = [
-                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2'),
-                          (u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil'),
-                          (u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid'),
-                          (u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto'),
-                          (u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon'),
-                          (u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia'),
-                          (u'Genbeta', u'http://feeds.weblogssl.com/genbeta'),
-                          (u'Applesfera', u'http://feeds.weblogssl.com/applesfera'),
-                          (u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra'),
-                          (u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred'),
-                          (u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine'),
-                          (u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2'),
-                          (u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica'),
-                          (u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero'),
-                          (u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco'),
-                          (u'Pop rosa', u'http://feeds.weblogssl.com/poprosa'),
-                          (u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom'),
-                          (u'Fandemia', u'http://feeds.weblogssl.com/fandemia'),
-                          (u'Noctamina', u'http://feeds.weblogssl.com/noctamina'),
-                          (u'Tendencias', u'http://feeds.weblogssl.com/trendencias'),
-                          (u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas'),
-                          (u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar'),
-                          (u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion'),
-                          (u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera'),
-                          (u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia'),
-                          (u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica'),
-                          (u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg'),
-                          (u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora'),
-                          (u'Mensencia', u'http://feeds.weblogssl.com/mensencia'),
-                          (u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas'),
-                          (u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion'),
-                          (u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1'),
-                          (u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto'),
-                          (u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol'),
-                          (u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites'),
-                          (u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar'),
-                          (u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2'),
-                          (u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos'),
-                          (u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme'),
-                          (u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
+                          (u'Xataka', u'http://feeds.weblogssl.com/xataka2')
+                          ,(u'Xataka M\xf3vil', u'http://feeds.weblogssl.com/xatakamovil')
+                          ,(u'Xataka Android', u'http://feeds.weblogssl.com/xatakandroid')
+                          ,(u'Xataka Foto', u'http://feeds.weblogssl.com/xatakafoto')
+                          ,(u'Xataka ON', u'http://feeds.weblogssl.com/xatakaon')
+                          ,(u'Xataka Ciencia', u'http://feeds.weblogssl.com/xatakaciencia')
+                          ,(u'Genbeta', u'http://feeds.weblogssl.com/genbeta')
+                          ,(u'Genbeta Dev', u'http://feeds.weblogssl.com/genbetadev')
+                          ,(u'Applesfera', u'http://feeds.weblogssl.com/applesfera')
+                          ,(u'Vida Extra', u'http://feeds.weblogssl.com/vidaextra')
+                          ,(u'Naci\xf3n Red', u'http://feeds.weblogssl.com/nacionred')
+                          ,(u'Blog de Cine', u'http://feeds.weblogssl.com/blogdecine')
+                          ,(u'Vaya tele', u'http://feeds.weblogssl.com/vayatele2')
+                          ,(u'Hipers\xf3nica', u'http://feeds.weblogssl.com/hipersonica')
+                          ,(u'Diario del viajero', u'http://feeds.weblogssl.com/diariodelviajero')
+                          ,(u'Papel en blanco', u'http://feeds.weblogssl.com/papelenblanco')
+                          ,(u'Pop rosa', u'http://feeds.weblogssl.com/poprosa')
+                          ,(u'Zona FandoM', u'http://feeds.weblogssl.com/zonafandom')
+                          ,(u'Fandemia', u'http://feeds.weblogssl.com/fandemia')
+                          ,(u'Noctamina', u'http://feeds.weblogssl.com/noctamina')
+                          ,(u'Tendencias', u'http://feeds.weblogssl.com/trendencias')
+                          ,(u'Beb\xe9s y m\xe1s', u'http://feeds.weblogssl.com/bebesymas')
+                          ,(u'Directo al paladar', u'http://feeds.weblogssl.com/directoalpaladar')
+                          ,(u'Compradicci\xf3n', u'http://feeds.weblogssl.com/compradiccion')
+                          ,(u'Decoesfera', u'http://feeds.weblogssl.com/decoesfera')
+                          ,(u'Embelezzia', u'http://feeds.weblogssl.com/embelezzia')
+                          ,(u'Vit\xf3nica', u'http://feeds.weblogssl.com/vitonica')
+                          ,(u'Ambiente G', u'http://feeds.weblogssl.com/ambienteg')
+                          ,(u'Arrebatadora', u'http://feeds.weblogssl.com/arrebatadora')
+                          ,(u'Mensencia', u'http://feeds.weblogssl.com/mensencia')
+                          ,(u'Peques y m\xe1s', u'http://feeds.weblogssl.com/pequesymas')
+                          ,(u'Motorpasi\xf3n', u'http://feeds.weblogssl.com/motorpasion')
+                          ,(u'Motorpasi\xf3n F1', u'http://feeds.weblogssl.com/motorpasionf1')
+                          ,(u'Motorpasi\xf3n Moto', u'http://feeds.weblogssl.com/motorpasionmoto')
+                          ,(u'Motorpasi\xf3n Futuro', u'http://feeds.weblogssl.com/motorpasionfuturo')
+                          ,(u'Notas de futbol', u'http://feeds.weblogssl.com/notasdefutbol')
+                          ,(u'Fuera de l\xedmites', u'http://feeds.weblogssl.com/fueradelimites')
+                          ,(u'Salir a ganar', u'http://feeds.weblogssl.com/saliraganar')
+                          ,(u'El blog salm\xf3n', u'http://feeds.weblogssl.com/elblogsalmon2')
+                          ,(u'Pymes y aut\xf3nomos', u'http://feeds.weblogssl.com/pymesyautonomos')
+                          ,(u'Tecnolog\xeda Pyme', u'http://feeds.weblogssl.com/tecnologiapyme')
+                          ,(u'Ahorro diario', u'http://feeds.weblogssl.com/ahorrodiario')
                         ]


@ -102,3 +104,4 @@ class weblogssl(BasicNewsRecipe):
               video_yt['src'] = fuente3 + '/0.jpg'

        return soup
+
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -81,6 +81,11 @@ class WallStreetJournal(BasicNewsRecipe):
           feeds.append((title, articles))
        return feeds

+    def abs_wsj_url(self, href):
+        if not href.startswith('http'):
+            href = 'http://online.wsj.com' + href
+        return href
+
    def parse_index(self):
        soup = self.wsj_get_index()

@ -99,14 +104,14 @@ class WallStreetJournal(BasicNewsRecipe):
            pageone = a['href'].endswith('pageone')
            if pageone:
               title = 'Front Section'
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
               feeds = self.wsj_add_feed(feeds,title,url)
               title = "What's News"
               url = url.replace('pageone','whatsnews')
               feeds = self.wsj_add_feed(feeds,title,url)
            else:
               title = self.tag_to_string(a)
-               url = 'http://online.wsj.com' + a['href']
+               url = self.abs_wsj_url(a['href'])
               feeds = self.wsj_add_feed(feeds,title,url)
        return feeds

@ -163,7 +168,7 @@ class WallStreetJournal(BasicNewsRecipe):
                title = self.tag_to_string(a).strip() + ' [%s]'%meta
            else:
                title = self.tag_to_string(a).strip()
-            url = 'http://online.wsj.com'+a['href']
+            url = self.abs_wsj_url(a['href'])
            desc = ''
            for p in container.findAll('p'):
                desc = self.tag_to_string(p)
--- a/resources/default_tweaks.py
+++ b/resources/default_tweaks.py
@ -88,13 +88,6 @@ categories_collapsed_rating_template = r'{first.avg_rating:4.2f:ifempty(0)} - {l
 categories_collapsed_popularity_template = r'{first.count:d} - {last.count:d}'


-#: Set boolean custom columns to be tristate
-# Set whether boolean custom columns are two- or three-valued.
-#  Two-values for true booleans
-#  three-values for yes/no/unknown
-# Set to 'yes' for three-values, 'no' for two-values
-bool_custom_columns_are_tristate = 'yes'
-
 #: Specify columns to sort the booklist by on startup
 # Provide a set of columns to be sorted on when calibre starts
 #  The argument is None if saved sort history is to be used
--- a/resources/images/connect_share_on.png
+++ b/resources/images/connect_share_on.png
--- a/resources/images/highlight_only_off.png
+++ b/resources/images/highlight_only_off.png
--- a/resources/images/highlight_only_on.png
+++ b/resources/images/highlight_only_on.png
--- a/resources/images/store.png
+++ b/resources/images/store.png
--- a/setup/pygettext.py
+++ b/setup/pygettext.py
@ -170,8 +170,8 @@ from setup import __appname__, __version__ as version
 # there.
 pot_header = '''\
 # Translation template file..
-# Copyright (C) 2007 Kovid Goyal
-# Kovid Goyal <kovid@kovidgoyal.net>, 2007.
+# Copyright (C) %(year)s Kovid Goyal
+# Kovid Goyal <kovid@kovidgoyal.net>, %(year)s.
 #
 msgid ""
 msgstr ""
@ -185,7 +185,7 @@ msgstr ""
 "Content-Transfer-Encoding: 8bit\\n"
 "Generated-By: pygettext.py %%(version)s\\n"

-'''%dict(appname=__appname__, version=version)
+'''%dict(appname=__appname__, version=version, year=time.strftime('%Y'))


 def usage(code, msg=''):
--- a/setup/translations.py
+++ b/setup/translations.py
@ -26,6 +26,38 @@ class POT(Command):
                    ans.append(os.path.abspath(os.path.join(root, name)))
        return ans

+    def get_tweaks_docs(self):
+        path = self.a(self.j(self.SRC, '..', 'resources', 'default_tweaks.py'))
+        with open(path, 'rb') as f:
+            raw = f.read().decode('utf-8')
+        msgs = []
+        lines = list(raw.splitlines())
+        for i, line in enumerate(lines):
+            if line.startswith('#:'):
+                msgs.append((i, line[2:].strip()))
+                j = i
+                block = []
+                while True:
+                    j += 1
+                    line = lines[j]
+                    if not line.startswith('#'):
+                        break
+                    block.append(line[1:].strip())
+                if block:
+                    msgs.append((i+1, '\n'.join(block)))
+
+        ans = []
+        for lineno, msg in msgs:
+            ans.append('#: %s:%d'%(path, lineno))
+            slash = unichr(92)
+            msg = msg.replace(slash, slash*2).replace('"', r'\"').replace('\n',
+                    r'\n').replace('\r', r'\r').replace('\t', r'\t')
+            ans.append('msgid "%s"'%msg)
+            ans.append('msgstr ""')
+            ans.append('')
+
+        return '\n'.join(ans)
+

    def run(self, opts):
        files = self.source_files()
@ -35,10 +67,10 @@ class POT(Command):
        atexit.register(shutil.rmtree, tempdir)
        pygettext(buf, ['-k', '__', '-p', tempdir]+files)
        src = buf.getvalue()
+        src += '\n\n' + self.get_tweaks_docs()
        pot = os.path.join(self.PATH, __appname__+'.pot')
-        f = open(pot, 'wb')
+        with open(pot, 'wb') as f:
            f.write(src)
-        f.close()
        self.info('Translations template:', os.path.abspath(pot))
        return pot

--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -5,7 +5,9 @@ __docformat__ = 'restructuredtext en'

 import uuid, sys, os, re, logging, time, random, \
       __builtin__, warnings, multiprocessing
+from contextlib import closing
 from urllib import getproxies
+from urllib2 import unquote as urllib2_unquote
 __builtin__.__dict__['dynamic_property'] = lambda(func): func(None)
 from htmlentitydefs import name2codepoint
 from math import floor
@ -217,8 +219,19 @@ def filename_to_utf8(name):
    return name.decode(codec, 'replace').encode('utf8')

 def extract(path, dir):
-    ext = os.path.splitext(path)[1][1:].lower()
    extractor = None
+    # First use the file header to identify its type
+    with open(path, 'rb') as f:
+        id_ = f.read(3)
+    if id_ == b'Rar':
+        from calibre.libunrar import extract as rarextract
+        extractor = rarextract
+    elif id_.startswith(b'PK'):
+        from calibre.libunzip import extract as zipextract
+        extractor = zipextract
+    if extractor is None:
+        # Fallback to file extension
+        ext = os.path.splitext(path)[1][1:].lower()
        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
            from calibre.libunzip import extract as zipextract
            extractor = zipextract
@ -279,21 +292,24 @@ def get_parsed_proxy(typ='http', debug=True):
                    prints('Using http proxy', str(ans))
                return ans

+USER_AGENT = 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
+USER_AGENT_MOBILE = 'Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016'
+
 def random_user_agent():
    choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
    ]
+    #return choices[-1]
    return choices[random.randint(0, len(choices)-1)]

-
 def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
    '''
    Create a mechanize browser for web scraping. The browser handles cookies,
@ -307,8 +323,7 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
    opener.set_handle_refresh(True, max_time=max_time, honor_time=honor_time)
    opener.set_handle_robots(False)
    if user_agent is None:
-        user_agent = ' Mozilla/5.0 (Windows; U; Windows CE 5.1; rv:1.8.1a3) Gecko/20060610 Minimo/0.016' if mobile_browser else \
-                          'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101210 Gentoo Firefox/3.6.13'
+        user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
    opener.addheaders = [('User-agent', user_agent)]
    http_proxy = get_proxies().get('http', None)
    if http_proxy:
@ -525,7 +540,49 @@ def as_unicode(obj, enc=preferred_encoding):
                obj = repr(obj)
    return force_unicode(obj, enc=enc)

+def url_slash_cleaner(url):
+    '''
+    Removes redundant /'s from url's.
+    '''
+    return re.sub(r'(?<!:)/{2,}', '/', url)

+def get_download_filename(url, cookie_file=None):
+    '''
+    Get a local filename for a URL using the content disposition header
+    '''
+    filename = ''
+
+    br = browser()
+    if cookie_file:
+        from mechanize import MozillaCookieJar
+        cj = MozillaCookieJar()
+        cj.load(cookie_file)
+        br.set_cookiejar(cj)
+
+    try:
+        with closing(br.open(url)) as r:
+            disposition = r.info().get('Content-disposition', '')
+            for p in disposition.split(';'):
+                if 'filename' in p:
+                    if '*=' in disposition:
+                        parts = disposition.split('*=')[-1]
+                        filename = parts.split('\'')[-1]
+                    else:
+                        filename = disposition.split('=')[-1]
+                    if filename[0] in ('\'', '"'):
+                        filename = filename[1:]
+                    if filename[-1] in ('\'', '"'):
+                        filename = filename[:-1]
+                    filename = urllib2_unquote(filename)
+                    break
+    except:
+        import traceback
+        traceback.print_exc()
+
+    if not filename:
+        filename = r.geturl().split('/')[-1]
+
+    return filename

 def human_readable(size):
    """ Convert a size in bytes into a human readable form """
--- a/src/calibre/constants.py
+++ b/src/calibre/constants.py
@ -2,7 +2,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
 __docformat__ = 'restructuredtext en'
 __appname__   = 'calibre'
-__version__   = '0.7.53'
+__version__   = '0.7.55'
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

 import re, importlib
--- a/src/calibre/customize/init.py
+++ b/src/calibre/customize/init.py
@ -602,3 +602,35 @@ class PreferencesPlugin(Plugin): # {{{

 # }}}

+class StoreBase(Plugin): # {{{
+
+    supported_platforms = ['windows', 'osx', 'linux']
+    author         = 'John Schember'
+    type = _('Store')
+
+    actual_plugin = None
+
+    def load_actual_plugin(self, gui):
+        '''
+        This method must return the actual interface action plugin object.
+        '''
+        mod, cls = self.actual_plugin.split(':')
+        self.actual_plugin_object  = getattr(importlib.import_module(mod), cls)(gui, self.name)
+        return self.actual_plugin_object
+
+    def customization_help(self, gui=False):
+        if getattr(self, 'actual_plugin_object', None) is not None:
+            return self.actual_plugin_object.customization_help(gui)
+        raise NotImplementedError()
+
+    def config_widget(self):
+        if getattr(self, 'actual_plugin_object', None) is not None:
+            return self.actual_plugin_object.config_widget()
+        raise NotImplementedError()
+
+    def save_settings(self, config_widget):
+        if getattr(self, 'actual_plugin_object', None) is not None:
+            return self.actual_plugin_object.save_settings(config_widget)
+        raise NotImplementedError()
+
+# }}}
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -5,11 +5,12 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
 import textwrap, os, glob, functools, re
 from calibre import guess_type
 from calibre.customize import FileTypePlugin, MetadataReaderPlugin, \
-    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase
+    MetadataWriterPlugin, PreferencesPlugin, InterfaceActionBase, StoreBase
 from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.utils.config import test_eight_code

 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -166,6 +167,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
    description = _('Extract cover from comic files')

    def get_metadata(self, stream, ftype):
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            id_ = stream.read(3)
+            stream.seek(pos)
+            if id_ == b'Rar':
+                ftype = 'cbr'
+            elif id_.startswith(b'PK'):
+                ftype = 'cbz'
        if ftype == 'cbr':
            from calibre.libunrar import extract_first_alphabetically as extract_first
            extract_first
@ -604,6 +613,24 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK

+from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
+from calibre.ebooks.epub.fix.unmanifested import Unmanifested
+from calibre.ebooks.epub.fix.epubcheck import Epubcheck
+
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        Epubcheck, ]
+
+if test_eight_code:
+# New metadata download plugins {{{
+    from calibre.ebooks.metadata.sources.google import GoogleBooks
+    from calibre.ebooks.metadata.sources.amazon import Amazon
+    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+    from calibre.ebooks.metadata.sources.isbndb import ISBNDB
+
+    plugins += [GoogleBooks, Amazon, OpenLibrary, ISBNDB]
+
+# }}}
+else:
    from calibre.ebooks.metadata.fetch import KentDistrictLibrary, Amazon
    from calibre.ebooks.metadata.douban import DoubanBooks
    from calibre.ebooks.metadata.isbndb import ISBNDB
@ -613,14 +640,11 @@ from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
    from calibre.ebooks.metadata.fictionwise import Fictionwise
    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
            AmazonCovers, DoubanCovers #, LibrarythingCovers
-from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
-from calibre.ebooks.epub.fix.unmanifested import Unmanifested
-from calibre.ebooks.epub.fix.epubcheck import Epubcheck

-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon, #AmazonSocial,
-        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers, #LibrarythingCovers,
-        NiceBooksCovers]
+    plugins += [GoogleBooks, ISBNDB, Amazon,
+        OpenLibraryCovers, AmazonCovers, DoubanCovers,
+        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
+
 plugins += [
    ComicInput,
    EPUBInput,
@ -833,6 +857,11 @@ class ActionNextMatch(InterfaceActionBase):
    name = 'Next Match'
    actual_plugin = 'calibre.gui2.actions.next_match:NextMatchAction'

+class ActionStore(InterfaceActionBase):
+    name = 'Store'
+    author = 'John Schember'
+    actual_plugin = 'calibre.gui2.actions.store:StoreAction'
+
 plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
        ActionConvert, ActionDelete, ActionEditMetadata, ActionView,
        ActionFetchNews, ActionSaveToDisk, ActionShowBookDetails,
@ -841,6 +870,9 @@ plugins += [ActionAdd, ActionFetchAnnotations, ActionGenerateCatalog,
        ActionAddToLibrary, ActionEditCollections, ActionChooseLibrary,
        ActionCopyToLibrary, ActionTweakEpub, ActionNextMatch]

+if test_eight_code:
+    plugins += [ActionStore]
+
 # }}}

 # Preferences Plugins {{{
@ -1018,6 +1050,17 @@ class Server(PreferencesPlugin):
            'give you access to your calibre library from anywhere, '
            'on any device, over the internet')

+class MetadataSources(PreferencesPlugin):
+    name = 'Metadata download'
+    icon = I('metadata.png')
+    gui_name = _('Metadata download')
+    category = 'Sharing'
+    gui_category = _('Sharing')
+    category_order = 4
+    name_order = 3
+    config_widget = 'calibre.gui2.preferences.metadata_sources'
+    description = _('Control how calibre downloads ebook metadata from the net')
+
 class Plugins(PreferencesPlugin):
    name = 'Plugins'
    icon = I('plugins.png')
@ -1056,13 +1099,86 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
        CommonOptions, OutputOptions, Adding, Saving, Sending, Plugboard,
        Email, Server, Plugins, Tweaks, Misc, TemplateFunctions]

-#}}}
-
-# New metadata download plugins {{{
-from calibre.ebooks.metadata.sources.google import GoogleBooks
-from calibre.ebooks.metadata.sources.amazon import Amazon
-from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
-
-plugins += [GoogleBooks, Amazon, OpenLibrary]
+if test_eight_code:
+    plugins.append(MetadataSources)
+
+#}}}
+
+# Store plugins {{{
+class StoreAmazonKindleStore(StoreBase):
+    name = 'Amazon Kindle'
+    description = _('Kindle books from Amazon')
+    actual_plugin = 'calibre.gui2.store.amazon_plugin:AmazonKindleStore'
+
+class StoreBaenWebScriptionStore(StoreBase):
+    name = 'Baen WebScription'
+    description = _('Ebooks for readers.')
+    actual_plugin = 'calibre.gui2.store.baen_webscription_plugin:BaenWebScriptionStore'
+
+class StoreBNStore(StoreBase):
+    name = 'Barnes and Noble'
+    description = _('Books, Textbooks, eBooks, Toys, Games and More.')
+    actual_plugin = 'calibre.gui2.store.bn_plugin:BNStore'
+
+class StoreBeWriteStore(StoreBase):
+    name = 'BeWrite Books'
+    description = _('Publishers of fine books.')
+    actual_plugin = 'calibre.gui2.store.bewrite_plugin:BeWriteStore'
+
+class StoreDieselEbooksStore(StoreBase):
+    name = 'Diesel eBooks'
+    description = _('World Famous eBook Store.')
+    actual_plugin = 'calibre.gui2.store.diesel_ebooks_plugin:DieselEbooksStore'
+
+class StoreEbookscomStore(StoreBase):
+    name = 'eBooks.com'
+    description = _('The digital bookstore.')
+    actual_plugin = 'calibre.gui2.store.ebooks_com_plugin:EbookscomStore'
+
+class StoreEHarlequinStoretore(StoreBase):
+    name = 'eHarlequin'
+    description = _('entertain, enrich, inspire.')
+    actual_plugin = 'calibre.gui2.store.eharlequin_plugin:EHarlequinStore'
+
+class StoreFeedbooksStore(StoreBase):
+    name = 'Feedbooks'
+    description = _('Read anywhere.')
+    actual_plugin = 'calibre.gui2.store.feedbooks_plugin:FeedbooksStore'
+
+class StoreGutenbergStore(StoreBase):
+    name = 'Project Gutenberg'
+    description = _('The first producer of free ebooks.')
+    actual_plugin = 'calibre.gui2.store.gutenberg_plugin:GutenbergStore'
+
+class StoreKoboStore(StoreBase):
+    name = 'Kobo'
+    description = _('eReading: anytime. anyplace.')
+    actual_plugin = 'calibre.gui2.store.kobo_plugin:KoboStore'
+
+class StoreManyBooksStore(StoreBase):
+    name = 'ManyBooks'
+    description = _('The best ebooks at the best price: free!')
+    actual_plugin = 'calibre.gui2.store.manybooks_plugin:ManyBooksStore'
+
+class StoreMobileReadStore(StoreBase):
+    name = 'MobileRead'
+    description = _('Ebooks handcrafted with the utmost care')
+    actual_plugin = 'calibre.gui2.store.mobileread_plugin:MobileReadStore'
+
+class StoreOpenLibraryStore(StoreBase):
+    name = 'Open Library'
+    description = _('One web page for every book.')
+    actual_plugin = 'calibre.gui2.store.open_library_plugin:OpenLibraryStore'
+
+class StoreSmashwordsStore(StoreBase):
+    name = 'Smashwords'
+    description = _('Your ebook. Your way.')
+    actual_plugin = 'calibre.gui2.store.smashwords_plugin:SmashwordsStore'
+
+plugins += [StoreAmazonKindleStore, StoreBaenWebScriptionStore, StoreBNStore,
+    StoreBeWriteStore, StoreDieselEbooksStore, StoreEbookscomStore,
+    StoreEHarlequinStoretore,
+    StoreFeedbooksStore, StoreGutenbergStore, StoreKoboStore, StoreManyBooksStore,
+    StoreMobileReadStore, StoreOpenLibraryStore, StoreSmashwordsStore]

 # }}}
--- a/src/calibre/customize/profiles.py
+++ b/src/calibre/customize/profiles.py
@ -344,6 +344,7 @@ class iPadOutput(OutputProfile):
                border-spacing:1px;
                margin-left: 5%;
                margin-right: 5%;
+                page-break-inside:avoid;
                width: 90%;
                -webkit-border-radius:4px;
                }
--- a/src/calibre/customize/ui.py
+++ b/src/calibre/customize/ui.py
@ -7,7 +7,8 @@ import os, shutil, traceback, functools, sys
 from calibre.customize import (CatalogPlugin, FileTypePlugin, PluginNotFound,
                              MetadataReaderPlugin, MetadataWriterPlugin,
                              InterfaceActionBase as InterfaceAction,
-                              PreferencesPlugin, platform, InvalidPlugin)
+                              PreferencesPlugin, platform, InvalidPlugin,
+                              StoreBase as Store)
 from calibre.customize.conversion import InputFormatPlugin, OutputFormatPlugin
 from calibre.customize.zipplugin import loader
 from calibre.customize.profiles import InputProfile, OutputProfile
@ -75,6 +76,17 @@ def enable_plugin(plugin_or_name):
    ep.add(x)
    config['enabled_plugins'] = ep

+def restore_plugin_state_to_default(plugin_or_name):
+    x = getattr(plugin_or_name, 'name', plugin_or_name)
+    dp = config['disabled_plugins']
+    if x in dp:
+        dp.remove(x)
+    config['disabled_plugins'] = dp
+    ep = config['enabled_plugins']
+    if x in ep:
+        ep.remove(x)
+    config['enabled_plugins'] = ep
+
 default_disabled_plugins = set([
    'Douban Books', 'Douban.com covers', 'Nicebooks', 'Nicebooks covers',
    'Fictionwise', 'Kent District Library'
@ -233,6 +245,17 @@ def preferences_plugins():
                yield plugin
 # }}}

+# Store Plugins # {{{
+
+def store_plugins():
+    customization = config['plugin_customization']
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, Store):
+            if not is_disabled(plugin):
+                plugin.site_customization = customization.get(plugin.name, '')
+                yield plugin
+# }}}
+
 # Metadata read/write {{{
 _metadata_readers = {}
 _metadata_writers = {}
@ -453,12 +476,15 @@ def epub_fixers():
 # Metadata sources2 {{{
 def metadata_plugins(capabilities):
    capabilities = frozenset(capabilities)
-    for plugin in _initialized_plugins:
-        if isinstance(plugin, Source) and \
-                plugin.capabilities.intersection(capabilities) and \
+    for plugin in all_metadata_plugins():
+        if plugin.capabilities.intersection(capabilities) and \
                not is_disabled(plugin):
            yield plugin

+def all_metadata_plugins():
+    for plugin in _initialized_plugins:
+        if isinstance(plugin, Source):
+            yield plugin
 # }}}

 # Initialize plugins {{{
--- a/src/calibre/debug.py
+++ b/src/calibre/debug.py
@ -51,6 +51,8 @@ Run an embedded python interpreter.
            'with sqlite3 works.')
    parser.add_option('-p', '--py-console', help='Run python console',
            default=False, action='store_true')
+    parser.add_option('-m', '--inspect-mobi',
+            help='Inspect the MOBI file at the specified path', default=None)

    return parser

@ -227,6 +229,9 @@ def main(args=sys.argv):
        if len(args) > 1 and os.access(args[-1], os.R_OK):
            sql_dump = args[-1]
        reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
+    elif opts.inspect_mobi is not None:
+        from calibre.ebooks.mobi.debug import inspect_mobi
+        inspect_mobi(opts.inspect_mobi)
    else:
        from calibre import ipython
        ipython()
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -36,7 +36,9 @@ class ANDROID(USBMS):
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
+                       0x7086 : [0x0226], 0x70a8: [0x9999],
+                     },

            # Sony Ericsson
            0xfce : { 0xd12e : [0x0100]},
@ -52,6 +54,9 @@ class ANDROID(USBMS):
                       0x6877 : [0x0400],
                     },

+            # Viewsonic
+            0x0489 : { 0xc001 : [0x0226], 0xc004 : [0x0226], },
+
            # Acer
            0x502 : { 0x3203 : [0x0100]},

@ -94,14 +99,16 @@ class ANDROID(USBMS):

    VENDOR_NAME      = ['HTC', 'MOTOROLA', 'GOOGLE_', 'ANDROID', 'ACER',
            'GT-I5700', 'SAMSUNG', 'DELL', 'LINUX', 'GOOGLE', 'ARCHOS',
-            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA']
+            'TELECHIP', 'HUAWEI', 'T-MOBILE', 'SEMC', 'LGE', 'NVIDIA',
+            'GENERIC-']
    WINDOWS_MAIN_MEM = ['ANDROID_PHONE', 'A855', 'A853', 'INC.NEXUS_ONE',
            '__UMS_COMPOSITE', '_MB200', 'MASS_STORAGE', '_-_CARD', 'SGH-I897',
            'GT-I9000', 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID',
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
+            'MB860', 'MULTI-CARD', 'MID7015A']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7']
--- a/src/calibre/devices/apple/driver.py
+++ b/src/calibre/devices/apple/driver.py
@ -349,7 +349,7 @@ class ITUNES(DriverBase):
                                        break
                            break
                if self.report_progress is not None:
-                    self.report_progress(j+1/task_count, _('Updating device metadata listing...'))
+                    self.report_progress((j+1)/task_count, _('Updating device metadata listing...'))

            if self.report_progress is not None:
                self.report_progress(1.0, _('Updating device metadata listing...'))
@ -428,7 +428,7 @@ class ITUNES(DriverBase):
                         }

                        if self.report_progress is not None:
-                            self.report_progress(i+1/book_count, _('%d of %d') % (i+1, book_count))
+                            self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))
                    self._purge_orphans(library_books, cached_books)

                elif iswindows:
@ -466,7 +466,7 @@ class ITUNES(DriverBase):
                             }

                            if self.report_progress is not None:
-                                self.report_progress(i+1/book_count,
+                                self.report_progress((i+1)/book_count,
                                        _('%d of %d') % (i+1, book_count))
                        self._purge_orphans(library_books, cached_books)

@ -916,6 +916,8 @@ class ITUNES(DriverBase):
        """
        if DEBUG:
            self.log.info("ITUNES.reset()")
+        if report_progress:
+            self.set_progress_reporter(report_progress)

    def set_progress_reporter(self, report_progress):
        '''
@ -924,6 +926,9 @@ class ITUNES(DriverBase):
                                If it is called with -1 that means that the
                                task does not have any progress information
        '''
+        if DEBUG:
+            self.log.info("ITUNES.set_progress_reporter()")
+
        self.report_progress = report_progress

    def set_plugboards(self, plugboards, pb_func):
@ -1041,7 +1046,7 @@ class ITUNES(DriverBase):

                # Report progress
                if self.report_progress is not None:
-                    self.report_progress(i+1/file_count, _('%d of %d') % (i+1, file_count))
+                    self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))

        elif iswindows:
            try:
@ -1081,7 +1086,7 @@ class ITUNES(DriverBase):

                    # Report progress
                    if self.report_progress is not None:
-                        self.report_progress(i+1/file_count, _('%d of %d') % (i+1, file_count))
+                        self.report_progress((i+1)/file_count, _('%d of %d') % (i+1, file_count))
            finally:
                pythoncom.CoUninitialize()

@ -3065,7 +3070,7 @@ class ITUNES_ASYNC(ITUNES):
                     }

                    if self.report_progress is not None:
-                        self.report_progress(i+1/book_count, _('%d of %d') % (i+1, book_count))
+                        self.report_progress((i+1)/book_count, _('%d of %d') % (i+1, book_count))

            elif iswindows:
                try:
@ -3104,7 +3109,7 @@ class ITUNES_ASYNC(ITUNES):
                         }

                        if self.report_progress is not None:
-                            self.report_progress(i+1/book_count,
+                            self.report_progress((i+1)/book_count,
                                    _('%d of %d') % (i+1, book_count))

                finally:
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS):
    BCD         = [0x0324]

    VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902',
+            'PB903', 'PB']

 class POCKETBOOK701(USBMS):

--- a/src/calibre/devices/edge/driver.py
+++ b/src/calibre/devices/edge/driver.py
@ -26,9 +26,9 @@ class EDGE(USBMS):
    PRODUCT_ID  = [0x0c02]
    BCD         = [0x0223]

-    VENDOR_NAME = 'ANDROID'
-    WINDOWS_MAIN_MEM = '__FILE-STOR_GADG'
-    WINDOWS_CARD_A_MEM = '__FILE-STOR_GADG'
+    VENDOR_NAME = ['ANDROID', 'LINUX']
+    WINDOWS_MAIN_MEM = ['__FILE-STOR_GADG', 'FILE-CD_GADGET']
+    WINDOWS_CARD_A_MEM = ['__FILE-STOR_GADG', 'FILE-CD_GADGET']

    MAIN_MEMORY_VOLUME_LABEL  = 'Edge Main Memory'
    STORAGE_CARD_VOLUME_LABEL = 'Edge Storage Card'
--- a/src/calibre/devices/misc.py
+++ b/src/calibre/devices/misc.py
@ -244,7 +244,7 @@ class EEEREADER(USBMS):
    FORMATS     = ['epub', 'fb2', 'txt', 'pdf']

    VENDOR_ID   = [0x0b05]
-    PRODUCT_ID  = [0x178f]
+    PRODUCT_ID  = [0x178f, 0x17a1]
    BCD         = [0x0319]

    EBOOK_DIR_MAIN = EBOOK_DIR_CARD_A = 'Book'
--- a/src/calibre/devices/usbms/books.py
+++ b/src/calibre/devices/usbms/books.py
@ -203,6 +203,8 @@ class CollectionsBookList(BookList):
                    val = [orig_val]
                elif fm['datatype'] == 'text' and fm['is_multiple']:
                    val = orig_val
+                elif fm['datatype'] == 'composite' and fm['is_multiple']:
+                    val = [v.strip() for v in val.split(fm['is_multiple'])]
                else:
                    val = [val]

--- a/src/calibre/ebooks/init.py
+++ b/src/calibre/ebooks/init.py
@ -26,7 +26,7 @@ class ParserError(ValueError):
    pass

 BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm',
-                   'html', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
+                   'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'pdr', 'prc', 'mobi', 'azw', 'doc',
                   'epub', 'fb2', 'djvu', 'lrx', 'cbr', 'cbz', 'cbc', 'oebzip',
                   'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb']

--- a/src/calibre/ebooks/chm/input.py
+++ b/src/calibre/ebooks/chm/input.py
@ -51,6 +51,7 @@ class CHMInput(InputFormatPlugin):
            mainpath = os.path.join(tdir, mainname)

            metadata = get_metadata_from_reader(self._chm_reader)
+            self._chm_reader.CloseCHM()

            odi = options.debug_pipeline
            options.debug_pipeline = None
--- a/src/calibre/ebooks/epub/input.py
+++ b/src/calibre/ebooks/epub/input.py
@ -175,18 +175,18 @@ class EPUBInput(InputFormatPlugin):
                raise ValueError(
                    'EPUB files with DTBook markup are not supported')

+        not_for_spine = set()
+        for y in opf.itermanifest():
+            id_ = y.get('id', None)
+            if id_ and y.get('media-type', None) in \
+                ('application/vnd.adobe-page-template+xml',):
+                    not_for_spine.add(id_)
+
        for x in list(opf.iterspine()):
            ref = x.get('idref', None)
-            if ref is None:
+            if ref is None or ref in not_for_spine:
                x.getparent().remove(x)
                continue
-            for y in opf.itermanifest():
-                if y.get('id', None) == ref and y.get('media-type', None) in \
-                    ('application/vnd.adobe-page-template+xml',):
-                        p = x.getparent()
-                        if p is not None:
-                            p.remove(x)
-                        break

        with open('content.opf', 'wb') as nopf:
            nopf.write(opf.render())
--- a/src/calibre/ebooks/fb2/output.py
+++ b/src/calibre/ebooks/fb2/output.py
@ -155,7 +155,7 @@ class FB2Output(OutputFormatPlugin):
        OptionRecommendation(name='fb2_genre',
            recommended_value='antique', level=OptionRecommendation.LOW,
            choices=FB2_GENRES,
-            help=_('Genre for the book. Choices: %s\n\n See: ' % FB2_GENRES) + 'http://www.fictionbook.org/index.php/Eng:FictionBook_2.1_genres ' \
+            help=(_('Genre for the book. Choices: %s\n\n See: ') % FB2_GENRES) + 'http://www.fictionbook.org/index.php/Eng:FictionBook_2.1_genres ' \
                + _('for a complete list with descriptions.')),
    ])

--- a/src/calibre/ebooks/htmlz/input.py
+++ b/src/calibre/ebooks/htmlz/input.py
@ -10,6 +10,7 @@ import os

 from calibre import walk
 from calibre.customize.conversion import InputFormatPlugin
+from calibre.ebooks.chardet import xml_to_unicode
 from calibre.utils.zipfile import ZipFile

 class HTMLZInput(InputFormatPlugin):
@ -34,6 +35,13 @@ class HTMLZInput(InputFormatPlugin):
                    html = tf.read()
                    break
        
+        # Encoding
+        if options.input_encoding:
+            ienc = options.input_encoding
+        else:
+            ienc = xml_to_unicode(html[:4096])[-1]
+        html = html.decode(ienc, 'replace')
+        
        # Run the HTML through the html processing plugin.
        from calibre.customize.ui import plugin_for_input_format
        html_input = plugin_for_input_format('html')
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@ -12,10 +12,13 @@ Transform OEB content into a single (more or less) HTML file.

 import os

-from urlparse import urlparse
+from functools import partial
+from lxml import html
+from urlparse import urldefrag

 from calibre import prepare_string_for_xml
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\
+    OEB_IMAGES, XLINK, rewrite_links
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.utils.logging import default_log

@ -40,6 +43,8 @@ class OEB2HTML(object):
        self.opts = opts
        self.links = {}
        self.images = {}
+        self.base_hrefs = [item.href for item in oeb_book.spine]
+        self.map_resources(oeb_book)

        return self.mlize_spine(oeb_book)

@ -47,6 +52,8 @@ class OEB2HTML(object):
        output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
+            self.rewrite_ids(item.data, item)
+            rewrite_links(item.data, partial(self.rewrite_link, page=item))
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
            output.append('\n\n')
@ -56,45 +63,61 @@ class OEB2HTML(object):
    def dump_text(self, elem, stylizer, page):
        raise NotImplementedError

-    def get_link_id(self, href, aid):
-        aid = '%s#%s' % (href, aid)
-        if aid not in self.links:
-            self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
-        return self.links[aid]
-
-    def rewrite_links(self, tag, attribs, page):
-        # Rewrite ids.
-        if 'id' in attribs:
-            attribs['id'] = self.get_link_id(page.href, attribs['id'])
-        # Rewrite links.
-        if tag == 'a':
-            href = attribs['href']
-            href = page.abshref(href)
-            if self.url_is_relative(href):
-                if '#' not in href:
-                    href += '#'
+    def get_link_id(self, href, id=''):
+        if id:
+            href += '#%s' % id
        if href not in self.links:
-                    self.links[href] = 'calibre_link-%s' % len(self.links.keys())
-                href = '#%s' % self.links[href]
-            attribs['href'] = href
-        return attribs
+            self.links[href] = '#calibre_link-%s' % len(self.links.keys())
+        return self.links[href]

-    def rewrite_images(self, tag, attribs, page):
-        if tag == 'img':
-            src = attribs.get('src', None)
-            if src:
-                src = page.abshref(src)
-                if src not in self.images:
-                    ext = os.path.splitext(src)[1]
+    def map_resources(self, oeb_book):
+        for item in oeb_book.manifest:
+            if item.media_type in OEB_IMAGES:
+                if item.href not in self.images:
+                    ext = os.path.splitext(item.href)[1]
                    fname = '%s%s' % (len(self.images), ext)
                    fname = fname.zfill(10)
-                    self.images[src] = fname
-                attribs['src'] = 'images/%s' % self.images[src]
-        return attribs
+                    self.images[item.href] = fname
+            if item in oeb_book.spine:
+                self.get_link_id(item.href)
+                root = item.data.find(XHTML('body'))
+                link_attrs = set(html.defs.link_attrs)
+                link_attrs.add(XLINK('href'))
+                for el in root.iter():
+                    attribs = el.attrib
+                    try:
+                        if not isinstance(el.tag, basestring):
+                            continue
+                    except:
+                        continue
+                    for attr in attribs:
+                        if attr in link_attrs:
+                            href = item.abshref(attribs[attr])
+                            href, id = urldefrag(href)
+                            if href in self.base_hrefs:
+                                self.get_link_id(href, id)

-    def url_is_relative(self, url):
-        o = urlparse(url)
-        return False if o.scheme else True
+    def rewrite_link(self, url, page=None):
+        if not page:
+            return url
+        abs_url = page.abshref(url)
+        if abs_url in self.images:
+            return 'images/%s' % self.images[abs_url]
+        if abs_url in self.links:
+            return self.links[abs_url]
+        return url
+
+    def rewrite_ids(self, root, page):
+        for el in root.iter():
+            try:
+                tag = el.tag
+            except UnicodeDecodeError:
+                continue
+            if tag == XHTML('body'):
+                el.attrib['id'] = self.get_link_id(page.href)[1:]
+                continue
+            if 'id' in el.attrib:
+                el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]

    def get_css(self, oeb_book):
        css = u''
@ -131,9 +154,9 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib
+
        if tag == 'body':
            tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
        tags.append(tag)

        # Ignore anything that is set to not be displayed.
@ -147,9 +170,6 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
        if 'style' in attribs:
            del attribs['style']

-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
@ -222,7 +242,6 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
        style_a = '%s' % style
        if tag == 'body':
            tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
            if not style['page-break-before'] == 'always':
                style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
        tags.append(tag)
@ -233,9 +252,6 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
        if 'style' in attribs:
            del attribs['style']

-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
@ -280,6 +296,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
        output = []
        for item in oeb_book.spine:
            self.log.debug('Converting %s to HTML...' % item.href)
+            self.rewrite_ids(item.data, item)
+            rewrite_links(item.data, partial(self.rewrite_link, page=item))
            stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
            output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
            output.append('\n\n')
@ -307,23 +325,18 @@ class OEB2HTMLClassCSSizer(OEB2HTML):

        # Setup our variables.
        text = ['']
-        #style = stylizer.style(elem)
        tags = []
        tag = barename(elem.tag)
        attribs = elem.attrib

        if tag == 'body':
            tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
        tags.append(tag)

        # Remove attributes we won't want.
        if 'style' in attribs:
            del attribs['style']

-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
        # Turn the rest of the attributes into a string we can write with the tag.
        at = ''
        for k, v in attribs.items():
--- a/src/calibre/ebooks/htmlz/output.py
+++ b/src/calibre/ebooks/htmlz/output.py
@ -12,7 +12,7 @@ from lxml import etree

 from calibre.customize.conversion import OutputFormatPlugin, \
    OptionRecommendation
-from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.ebooks.oeb.base import OEB_IMAGES, SVG_MIME
 from calibre.ptempfile import TemporaryDirectory
 from calibre.utils.zipfile import ZipFile

@ -71,9 +71,13 @@ class HTMLZOutput(OutputFormatPlugin):
                    os.makedirs(os.path.join(tdir, 'images'))
                for item in oeb_book.manifest:
                    if item.media_type in OEB_IMAGES and item.href in images:
+                        if item.media_type == SVG_MIME:
+                            data = unicode(etree.tostring(item.data, encoding=unicode)) 
+                        else:
+                            data = item.data
                        fname = os.path.join(tdir, 'images', images[item.href])
                        with open(fname, 'wb') as img:
-                            img.write(item.data)
+                            img.write(data)

            # Metadata
            with open(os.path.join(tdir, 'metadata.opf'), 'wb') as mdataf:
--- a/src/calibre/ebooks/lrf/input.py
+++ b/src/calibre/ebooks/lrf/input.py
@ -6,8 +6,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import os, textwrap, sys
-from copy import deepcopy
+import os, textwrap, sys, operator
+from copy import deepcopy, copy

 from lxml import etree

@ -149,9 +149,65 @@ class TextBlock(etree.XSLTExtension):
        self.root = root
        self.parent = root
        self.add_text_to = (self.parent, 'text')
+        self.fix_deep_nesting(node)
        for child in node:
            self.process_child(child)

+    def fix_deep_nesting(self, node):
+        deepest = 1
+
+        def depth(node):
+            parent = node.getparent()
+            ans = 1
+            while parent is not None:
+                ans += 1
+                parent = parent.getparent()
+            return ans
+
+        for span in node.xpath('descendant::Span'):
+            d = depth(span)
+            if d > deepest:
+                deepest = d
+                if d > 500:
+                    break
+
+        if deepest < 500:
+            return
+
+        self.log.warn('Found deeply nested spans. Flattening.')
+        #with open('/t/before.xml', 'wb') as f:
+        #    f.write(etree.tostring(node, method='xml'))
+
+        spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
+        spans.sort(key=operator.itemgetter(0), reverse=True)
+
+        for depth, span in spans:
+            if depth < 3:
+                continue
+            p = span.getparent()
+            gp = p.getparent()
+            idx = p.index(span)
+            pidx = gp.index(p)
+            children = list(p)[idx:]
+            t = children[-1].tail
+            t = t if t else ''
+            children[-1].tail = t + (p.tail if p.tail else '')
+            p.tail = ''
+            pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
+            for child in children:
+                p.remove(child)
+                if pattrib and child.tag == "Span":
+                    attrib = copy(pattrib)
+                    attrib.update(child.attrib)
+                    child.attrib.update(attrib)
+
+
+            for child in reversed(children):
+                gp.insert(pidx+1, child)
+
+        #with open('/t/after.xml', 'wb') as f:
+        #    f.write(etree.tostring(node, method='xml'))
+
    def add_text(self, text):
        if text:
            if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -125,7 +125,10 @@ class Metadata(object):
        _data = object.__getattribute__(self, '_data')
        if field in TOP_LEVEL_IDENTIFIERS:
            field, val = self._clean_identifier(field, val)
-            _data['identifiers'].update({field: val})
+            identifiers = _data['identifiers']
+            identifiers.pop(field, None)
+            if val:
+                identifiers[field] = val
        elif field == 'identifiers':
            if not val:
                val = copy.copy(NULL_VALUES.get('identifiers', None))
@ -224,8 +227,7 @@ class Metadata(object):
        identifiers = object.__getattribute__(self,
            '_data')['identifiers']

-        if not val and typ in identifiers:
-            identifiers.pop(typ)
+        identifiers.pop(typ, None)
        if val:
            identifiers[typ] = val

@ -481,7 +483,7 @@ class Metadata(object):
                        self_tags = self.get(x, [])
                        self.set_user_metadata(x, meta) # get... did the deepcopy
                        other_tags = other.get(x, [])
-                        if meta['is_multiple']:
+                        if meta['datatype'] == 'text' and meta['is_multiple']:
                            # Case-insensitive but case preserving merging
                            lotags = [t.lower() for t in other_tags]
                            lstags = [t.lower() for t in self_tags]
@ -647,7 +649,7 @@ class Metadata(object):
            fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
        if self.series:
            fmt('Series', self.series + ' #%s'%self.format_series_index())
-        if self.language:
+        if not self.is_null('language'):
            fmt('Language', self.language)
        if self.rating is not None:
            fmt('Rating', self.rating)
--- a/src/calibre/ebooks/metadata/extz.py
+++ b/src/calibre/ebooks/metadata/extz.py
@ -8,12 +8,13 @@ Read meta information from extZ (TXTZ, HTMLZ...) files.
 '''

 import os
+import posixpath

 from cStringIO import StringIO

 from calibre.ebooks.metadata import MetaInformation
-from calibre.ebooks.metadata.opf2 import OPF, metadata_to_opf
-from calibre.ptempfile import TemporaryDirectory
+from calibre.ebooks.metadata.opf2 import OPF
+from calibre.ptempfile import PersistentTemporaryFile
 from calibre.utils.zipfile import ZipFile, safe_replace

 def get_metadata(stream, extract_cover=True):
@ -23,16 +24,75 @@ def get_metadata(stream, extract_cover=True):
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

-    with TemporaryDirectory('_untxtz_mdata') as tdir:
    try:
-            zf = ZipFile(stream)
-            zf.extract('metadata.opf', tdir)
-            with open(os.path.join(tdir, 'metadata.opf'), 'rb') as opff:
-                mi = OPF(opff).to_book_metadata()
+        with ZipFile(stream) as zf:
+            opf_name = get_first_opf_name(zf)
+            opf_stream = StringIO(zf.read(opf_name))
+            opf = OPF(opf_stream)
+            mi = opf.to_book_metadata()
+            if extract_cover:
+                cover_name = opf.raster_cover
+                if cover_name:
+                    mi.cover_data = ('jpg', zf.read(cover_name))
    except:
        return mi
    return mi

 def set_metadata(stream, mi):
-    opf = StringIO(metadata_to_opf(mi))
-    safe_replace(stream, 'metadata.opf', opf)
+    replacements = {}
+
+    # Get the OPF in the archive.
+    with ZipFile(stream) as zf:
+        opf_path = get_first_opf_name(zf)
+        opf_stream = StringIO(zf.read(opf_path))
+    opf = OPF(opf_stream)
+
+    # Cover.
+    new_cdata = None
+    try:
+        new_cdata = mi.cover_data[1]
+        if not new_cdata:
+            raise Exception('no cover')
+    except:
+        try:
+            new_cdata = open(mi.cover, 'rb').read()
+        except:
+            pass
+    if new_cdata:
+        raster_cover = opf.raster_cover
+        if not raster_cover:
+            raster_cover = 'cover.jpg'
+        cpath = posixpath.join(posixpath.dirname(opf_path), raster_cover)
+        new_cover = _write_new_cover(new_cdata, cpath)
+        replacements[cpath] = open(new_cover.name, 'rb')
+
+    # Update the metadata.
+    opf.smart_update(mi, replace_metadata=True)
+    newopf = StringIO(opf.render())
+    safe_replace(stream, opf_path, newopf, extra_replacements=replacements)
+
+    # Cleanup temporary files.
+    try:
+        if cpath is not None:
+            replacements[cpath].close()
+            os.remove(replacements[cpath].name)
+    except:
+        pass
+
+def get_first_opf_name(zf):
+    names = zf.namelist()
+    opfs = []
+    for n in names:
+        if n.endswith('.opf') and '/' not in n:
+            opfs.append(n)
+    if not opfs:
+        raise Exception('No OPF found')
+    opfs.sort()
+    return opfs[0]
+
+def _write_new_cover(new_cdata, cpath):
+    from calibre.utils.magick.draw import save_cover_data_to
+    new_cover = PersistentTemporaryFile(suffix=os.path.splitext(cpath)[1])
+    new_cover.close()
+    save_cover_data_to(new_cdata, new_cover.name)
+    return new_cover
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -327,6 +327,7 @@ class ResultList(list):
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=40):
    br   = browser()
+    br.set_handle_gzip(True)
    entries = Query(title=title, author=author, publisher=publisher,
                        isbn=isbn, max_results=max_results,
                            min_viewability=min_viewability)(br, verbose)
--- a/src/calibre/ebooks/metadata/mobi.py
+++ b/src/calibre/ebooks/metadata/mobi.py
@ -259,6 +259,7 @@ class MetadataUpdater(object):
        trail = len(new_record0.getvalue()) % 4
        pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte
        new_record0.write(pad)
+        new_record0.write('\0'*(1024*8))

        # Rebuild the stream, update the pdbrecords pointers
        self.patchSection(0,new_record0.getvalue())
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date

-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{

    '''
    Get book details from amazons book page in a separate thread
@ -218,6 +218,9 @@ class Worker(Thread): # {{{
                    ' @class="emptyClear" or @href]'):
                c.getparent().remove(c)
            desc = tostring(desc, method='html', encoding=unicode).strip()
+            # Encoding bug in Amazon data U+fffd (replacement char)
+            # in some examples it is present in place of '
+            desc = desc.replace('\ufffd', "'")
            # remove all attributes from tags
            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
            # Collapse whitespace
@ -276,12 +279,15 @@ class Worker(Thread): # {{{

 class Amazon(Source):

-    name = 'Amazon'
+    name = 'Amazon.com'
    description = _('Downloads metadata from Amazon')

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
-        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
+        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate',
+        'language'])
+    has_html_comments = True
+    supports_gzip_transfer_encoding = True

    AMAZON_DOMAINS = {
            'com': _('US'),
@ -290,6 +296,14 @@ class Amazon(Source):
            'uk' : _('UK'),
    }

+    def get_book_url(self, identifiers): # {{{
+        asin = identifiers.get('amazon', None)
+        if asin is None:
+            asin = identifiers.get('asin', None)
+        if asin:
+            return 'http://amzn.com/%s'%asin
+    # }}}
+
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        domain = self.prefs.get('domain', 'com')

@ -328,9 +342,10 @@ class Amazon(Source):
            # Insufficient metadata to make an identify query
            return None

-        utf8q = dict([(x.encode('utf-8'), y.encode('utf-8')) for x, y in
+        latin1q = dict([(x.encode('latin1', 'ignore'), y.encode('latin1',
+            'ignore')) for x, y in
            q.iteritems()])
-        url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
+        url = 'http://www.amazon.%s/s/?'%domain + urlencode(latin1q)
        return url

    # }}}
@ -408,6 +423,18 @@ class Amazon(Source):
                    if 'bulk pack' not in title:
                        matches.append(a.get('href'))
                    break
+            if not matches:
+                # This can happen for some user agents that Amazon thinks are
+                # mobile/less capable
+                log('Trying alternate results page markup')
+                for td in root.xpath(
+                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
+                    for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
+                        title = tostring(a, method='text', encoding=unicode).lower()
+                        if 'bulk pack' not in title:
+                            matches.append(a.get('href'))
+                        break
+

        # Keep only the top 5 matches as the matches are sorted by relevance by
        # Amazon so lower matches are not likely to be very relevant
@ -476,9 +503,10 @@ class Amazon(Source):
        if abort.is_set():
            return
        br = self.browser
+        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -15,9 +15,21 @@ from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
+from calibre.utils.icu import capitalize, lower
 from calibre.ebooks.metadata import check_isbn

-msprefs = JSONConfig('metadata_sources.json')
+msprefs = JSONConfig('metadata_sources/global.json')
+msprefs.defaults['txt_comments'] = False
+msprefs.defaults['ignore_fields'] = []
+msprefs.defaults['max_tags'] = 20
+msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
+msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
+msprefs.defaults['swap_author_names'] = False
+
+# Google covers are often poor quality (scans/errors) but they have high
+# resolution, so they trump covers from better sources. So make sure they
+# are only used if no other covers are found.
+msprefs.defaults['cover_priorities'] = {'Google':2}

 def create_log(ostream=None):
    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@ -67,8 +79,8 @@ class InternalMetadataCompareKeyGen(object):
        exact_title = 1 if title and \
                cleanup_title(title) == cleanup_title(mi.title) else 2

-        has_cover = 2 if source_plugin.get_cached_cover_url(mi.identifiers)\
-                is None else 1
+        has_cover = 2 if (not source_plugin.cached_cover_url_is_reliable or
+                source_plugin.get_cached_cover_url(mi.identifiers) is None) else 1

        self.base = (isbn, has_cover, all_fields, exact_title)
        self.comments_len = len(mi.comments.strip() if mi.comments else '')
@ -89,6 +101,54 @@ class InternalMetadataCompareKeyGen(object):

 # }}}

+def get_cached_cover_urls(mi):
+    from calibre.customize.ui import metadata_plugins
+    plugins = list(metadata_plugins(['identify']))
+    for p in plugins:
+        url = p.get_cached_cover_url(mi.identifiers)
+        if url:
+            yield (p, url)
+
+def cap_author_token(token):
+    lt = lower(token)
+    if lt in ('von', 'de', 'el', 'van', 'le'):
+        return lt
+    if re.match(r'([a-z]\.){2,}$', lt) is not None:
+        # Normalize tokens of the form J.K. to J. K.
+        parts = token.split('.')
+        return '. '.join(map(capitalize, parts)).strip()
+    return capitalize(token)
+
+def fixauthors(authors):
+    if not authors:
+        return authors
+    ans = []
+    for x in authors:
+        ans.append(' '.join(map(cap_author_token, x.split())))
+    return ans
+
+def fixcase(x):
+    if x:
+        x = titlecase(x)
+    return x
+
+class Option(object):
+    __slots__ = ['type', 'default', 'label', 'desc', 'name', 'choices']
+
+    def __init__(self, name, type_, default, label, desc, choices=None):
+        '''
+        :param name: The name of this option. Must be a valid python identifier
+        :param type_: The type of this option, one of ('number', 'string',
+                        'bool', 'choices')
+        :param default: The default value for this option
+        :param label: A short (few words) description of this option
+        :param desc: A longer description of this option
+        :param choices: A list of possible values, used only if type='choices'
+        '''
+        self.name, self.type, self.default, self.label, self.desc = (name,
+                type_, default, label, desc)
+        self.choices = choices
+
 class Source(Plugin):

    type = _('Metadata source')
@ -104,6 +164,29 @@ class Source(Plugin):
    #: during the identify phase
    touched_fields = frozenset()

+    #: Set this to True if your plugin return HTML formatted comments
+    has_html_comments = False
+
+    #: Setting this to True means that the browser object will add
+    #: Accept-Encoding: gzip to all requests. This can speedup downloads
+    #: but make sure that the source actually supports gzip transfer encoding
+    #: correctly first
+    supports_gzip_transfer_encoding = False
+
+    #: Cached cover URLs can sometimes be unreliable (i.e. the download could
+    #: fail or the returned image could be bogus. If that is often the case
+    #: with this source set to False
+    cached_cover_url_is_reliable = True
+
+    #: A list of :class:`Option` objects. They will be used to automatically
+    #: construct the configuration widget for this plugin
+    options = ()
+
+    #: A string that is displayed at the top of the config widget for this
+    #: plugin
+    config_help_message = None
+
+
    def __init__(self, *args, **kwargs):
        Plugin.__init__(self, *args, **kwargs)
        self._isbn_to_identifier_cache = {}
@ -111,9 +194,29 @@ class Source(Plugin):
        self.cache_lock = threading.RLock()
        self._config_obj = None
        self._browser = None
+        self.prefs.defaults['ignore_fields'] = []
+        for opt in self.options:
+            self.prefs.defaults[opt.name] = opt.default

    # Configuration {{{

+    def is_configured(self):
+        '''
+        Return False if your plugin needs to be configured before it can be
+        used. For example, it might need a username/password/API key.
+        '''
+        return True
+
+    def is_customizable(self):
+        return True
+
+    def config_widget(self):
+        from calibre.gui2.metadata.config import ConfigWidget
+        return ConfigWidget(self)
+
+    def save_settings(self, config_widget):
+        config_widget.commit()
+
    @property
    def prefs(self):
        if self._config_obj is None:
@ -127,6 +230,8 @@ class Source(Plugin):
    def browser(self):
        if self._browser is None:
            self._browser = browser(user_agent=random_user_agent())
+            if self.supports_gzip_transfer_encoding:
+                self._browser.set_handle_gzip(True)
        return self._browser.clone_browser()

    # }}}
@ -229,13 +334,9 @@ class Source(Plugin):
        before putting the Metadata object into result_queue. You can of
        course, use a custom algorithm suited to your metadata source.
        '''
-        def fixcase(x):
-            if x:
-                x = titlecase(x)
-            return x
        if mi.title:
            mi.title = fixcase(mi.title)
-        mi.authors = list(map(fixcase, mi.authors))
+        mi.authors = fixauthors(mi.authors)
        mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)

@ -243,6 +344,13 @@ class Source(Plugin):

    # Metadata API {{{

+    def get_book_url(self, identifiers):
+        '''
+        Return the URL for the book identified by identifiers at this source.
+        If no URL is found, return None.
+        '''
+        return None
+
    def get_cached_cover_url(self, identifiers):
        '''
        Return cached cover URL for the book identified by
@ -316,7 +424,8 @@ class Source(Plugin):
            title=None, authors=None, identifiers={}, timeout=30):
        '''
        Download a cover and put it into result_queue. The parameters all have
-        the same meaning as for :meth:`identify`.
+        the same meaning as for :meth:`identify`. Put (self, cover_data) into
+        result_queue.

        This method should use cached cover URLs for efficiency whenever
        possible. When cached data is not present, most plugins simply call
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap
+from io import BytesIO
+from threading import Event
+
+from calibre import prints
+from calibre.utils.config import OptionParser
+from calibre.utils.magick.draw import save_cover_data_to
+from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.identify import identify
+from calibre.ebooks.metadata.sources.covers import download_cover
+from calibre.utils.config import test_eight_code
+
+def option_parser():
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import option_parser
+        return option_parser()
+
+    parser = OptionParser(textwrap.dedent(
+        '''\
+        %prog [options]
+
+        Fetch book metadata from online sources. You must specify at least one
+        of title, authors or ISBN.
+        '''
+    ))
+    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-a', '--authors', help='Book author(s)')
+    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-v', '--verbose', default=False, action='store_true',
+                      help='Print the log to the console (stderr)')
+    parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
+    parser.add_option('-c', '--cover',
+            help='Specify a filename. The cover, if available, will be saved to it')
+    parser.add_option('-d', '--timeout', default='30',
+            help='Timeout in seconds. Default is 30')
+
+    return parser
+
+def main(args=sys.argv):
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import main
+        return main(args)
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+
+    buf = BytesIO()
+    log = create_log(buf)
+    abort = Event()
+
+    authors = []
+    if opts.authors:
+        authors = string_to_authors(opts.authors)
+
+    identifiers = {}
+    if opts.isbn:
+        identifiers['isbn'] = opts.isbn
+
+    results = identify(log, abort, title=opts.title, authors=authors,
+            identifiers=identifiers, timeout=int(opts.timeout))
+
+    if not results:
+        print (log, file=sys.stderr)
+        prints('No results found', file=sys.stderr)
+        raise SystemExit(1)
+    result = results[0]
+
+    cf = None
+    if opts.cover and results:
+        cover = download_cover(log, title=opts.title, authors=authors,
+                identifiers=result.identifiers, timeout=int(opts.timeout))
+        if cover is None:
+            prints('No cover found', file=sys.stderr)
+        else:
+            save_cover_data_to(cover[-1], opts.cover)
+            result.cover = cf = opts.cover
+
+
+    log = buf.getvalue()
+
+
+    result = (metadata_to_opf(result) if opts.opf else
+                    unicode(result).encode('utf-8'))
+
+    if opts.verbose:
+        print (log, file=sys.stderr)
+
+    print (result)
+    if not opts.opf and opts.cover:
+        prints('Cover               :', cf)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@ -0,0 +1,183 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from Queue import Queue, Empty
+from threading import Thread, Event
+from io import BytesIO
+
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.base import msprefs, create_log
+from calibre.utils.magick.draw import Image, save_cover_data_to
+
+class Worker(Thread):
+
+    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.plugin = plugin
+        self.abort = abort
+        self.buf = BytesIO()
+        self.log = create_log(self.buf)
+        self.title, self.authors, self.identifiers = (title, authors,
+                identifiers)
+        self.timeout, self.rq = timeout, rq
+        self.time_spent = None
+
+    def run(self):
+        start_time = time.time()
+        if not self.abort.is_set():
+            try:
+                self.plugin.download_cover(self.log, self.rq, self.abort,
+                    title=self.title, authors=self.authors,
+                    identifiers=self.identifiers, timeout=self.timeout)
+            except:
+                self.log.exception('Failed to download cover from',
+                        self.plugin.name)
+        self.time_spent = time.time() - start_time
+
+def is_worker_alive(workers):
+    for w in workers:
+        if w.is_alive():
+            return True
+    return False
+
+def process_result(log, result):
+    plugin, data = result
+    try:
+        im = Image()
+        im.load(data)
+        im.trim(10)
+        width, height = im.size
+        fmt = im.format
+
+        if width < 50 or height < 50:
+            raise ValueError('Image too small')
+        data = save_cover_data_to(im, '/cover.jpg', return_data=True)
+    except:
+        log.exception('Invalid cover from', plugin.name)
+        return None
+    return (plugin, width, height, fmt, data)
+
+def run_download(log, results, abort,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Run the cover download, putting results into the queue :param:`results`.
+
+    Each result is a tuple of the form:
+
+        (plugin, width, height, fmt, bytes)
+
+    '''
+    if title == _('Unknown'):
+        title = None
+    if authors == [_('Unknown')]:
+        authors = None
+
+    plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
+
+    rq = Queue()
+    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
+            in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    wait_time = msprefs['wait_after_first_cover_result']
+    found_results = {}
+
+    while True:
+        time.sleep(0.1)
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+                if first_result_at is not None:
+                    first_result_at = time.time()
+        except Empty:
+            pass
+
+        if not is_worker_alive(workers):
+            break
+
+        if first_result_at is not None and time.time() - first_result_at > wait_time:
+            log('Not waiting for any more results')
+            abort.set()
+
+        if abort.is_set():
+            break
+
+    while True:
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+        except Empty:
+            break
+
+    for w in workers:
+        wlog = w.buf.getvalue().strip()
+        log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
+        log('Request extra headers:', w.plugin.browser.addheaders)
+        if w.plugin in found_results:
+            result = found_results[w.plugin]
+            log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
+        else:
+            log('Failed to download valid cover')
+        if w.time_spent is None:
+            log('Download aborted')
+        else:
+            log('Took', w.time_spent, 'seconds')
+        if wlog:
+            log(wlog)
+        log('\n'+'*'*80)
+
+
+def download_cover(log,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Synchronous cover download. Returns the "best" cover as per user
+    prefs/cover resolution.
+
+    Returned cover is a tuple: (plugin, width, height, fmt, data)
+
+    Returns None if no cover is found.
+    '''
+    rq = Queue()
+    abort = Event()
+
+    run_download(log, rq, abort, title=title, authors=authors,
+            identifiers=identifiers, timeout=timeout)
+
+    results = []
+
+    while True:
+        try:
+            results.append(rq.get_nowait())
+        except Empty:
+            break
+
+    cp = msprefs['cover_priorities']
+
+    def keygen(result):
+        plugin, width, height, fmt, data = result
+        return (cp.get(plugin.name, 1), 1/(width*height))
+
+    results.sort(key=keygen)
+
+    return results[0] if results else None
+
+
+
+
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

-import time
+import time, hashlib
 from urllib import urlencode
 from functools import partial
 from Queue import Queue, Empty
@ -133,7 +133,7 @@ def to_metadata(browser, log, entry_, timeout): # {{{
            default = utcnow().replace(day=15)
            mi.pubdate = parse_date(pubdate, assume_utc=True, default=default)
        except:
-            log.exception('Failed to parse pubdate')
+            log.error('Failed to parse pubdate %r'%pubdate)

    # Ratings
    for x in rating(extra):
@ -145,24 +145,37 @@ def to_metadata(browser, log, entry_, timeout): # {{{
            log.exception('Failed to parse rating')

    # Cover
-    mi.has_google_cover = len(extra.xpath(
-        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
+    mi.has_google_cover = None
+    for x in extra.xpath(
+            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
+        mi.has_google_cover = x.get('href')
+        break

    return mi
 # }}}

 class GoogleBooks(Source):

-    name = 'Google Books'
+    name = 'Google'
    description = _('Downloads metadata from Google Books')

    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
        'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:google']) # language currently disabled
+    supports_gzip_transfer_encoding = True
+    cached_cover_url_is_reliable = False

    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'

+    DUMMY_IMAGE_MD5 = frozenset(['0de4383ebad0adad5eeb8975cd796657'])
+
+    def get_book_url(self, identifiers): # {{{
+        goog = identifiers.get('google', None)
+        if goog is not None:
+            return 'http://books.google.com/books?id=%s'%goog
+    # }}}
+
    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
        BASE_URL = 'http://books.google.com/books/feeds/volumes?'
        isbn = check_isbn(identifiers.get('isbn', None))
@ -212,7 +225,7 @@ class GoogleBooks(Source):
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
-                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
@ -222,9 +235,14 @@ class GoogleBooks(Source):
        if abort.is_set():
            return
        br = self.browser
+        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            if cdata:
+                if hashlib.md5(cdata).hexdigest() in self.DUMMY_IMAGE_MD5:
+                    log.warning('Google returned a dummy image, ignoring')
+                else:
+                    result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)

@ -270,6 +288,9 @@ class GoogleBooks(Source):
            identifiers={}, timeout=30):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
+        if not query:
+            log.error('Insufficient metadata to construct query')
+            return
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -8,17 +8,21 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import time
+from datetime import datetime
 from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
+from operator import attrgetter

-from calibre.customize.ui import metadata_plugins
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.customize.ui import metadata_plugins, all_metadata_plugins
+from calibre.ebooks.metadata.sources.base import create_log, msprefs
 from calibre.ebooks.metadata.xisbn import xisbn
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.date import utc_tz
+from calibre.utils.html2text import html2text
+from calibre.utils.icu import lower

-# How long to wait for more results after first result is found
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
-
+# Download worker {{{
 class Worker(Thread):

    def __init__(self, plugin, kwargs, abort):
@ -31,10 +35,12 @@ class Worker(Thread):
        self.log = create_log(self.buf)

    def run(self):
+        start = time.time()
        try:
            self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
        except:
            self.log.exception('Plugin', self.plugin.name, 'failed')
+        self.plugin.dl_time_spent = time.time() - start

 def is_worker_alive(workers):
    for w in workers:
@ -42,9 +48,217 @@ def is_worker_alive(workers):
            return True
    return False

-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+# }}}
+
+# Merge results from different sources {{{
+
+class ISBNMerge(object):
+
+    def __init__(self):
+        self.pools = {}
+        self.isbnless_results = []
+
+    def isbn_in_pool(self, isbn):
+        if isbn:
+            for isbns, pool in self.pools.iteritems():
+                if isbn in isbns:
+                    return pool
+        return None
+
+    def pool_has_result_from_same_source(self, pool, result):
+        results = pool[1]
+        for r in results:
+            if r.identify_plugin is result.identify_plugin:
+                return True
+        return False
+
+    def add_result(self, result):
+        isbn = result.isbn
+        if isbn:
+            pool = self.isbn_in_pool(isbn)
+            if pool is None:
+                isbns, min_year = xisbn.get_isbn_pool(isbn)
+                if not isbns:
+                    isbns = frozenset([isbn])
+                self.pools[isbns] = pool = (min_year, [])
+
+            if not self.pool_has_result_from_same_source(pool, result):
+                pool[1].append(result)
+        else:
+            self.isbnless_results.append(result)
+
+    def finalize(self):
+        has_isbn_result = False
+        for results in self.pools.itervalues():
+            if results:
+                has_isbn_result = True
+                break
+        self.has_isbn_result = has_isbn_result
+
+        if has_isbn_result:
+            self.merge_isbn_results()
+        else:
+            results = sorted(self.isbnless_results,
+                    key=attrgetter('relevance_in_source'))
+            # Pick only the most relevant result from each source
+            self.results = []
+            seen = set()
+            for result in results:
+                if result.identify_plugin not in seen:
+                    seen.add(result.identify_plugin)
+                    self.results.append(result)
+                    result.average_source_relevance = \
+                        result.relevance_in_source
+
+        self.merge_metadata_results()
+
+        return self.results
+
+    def merge_metadata_results(self):
+        ' Merge results with identical title and authors '
+        groups = {}
+        for result in self.results:
+            title = lower(result.title if result.title else '')
+            key = (title, tuple([lower(x) for x in result.authors]))
+            if key not in groups:
+                groups[key] = []
+            groups[key].append(result)
+
+        if len(groups) != len(self.results):
+            self.results = []
+            for rgroup in groups.itervalues():
+                rel = [r.average_source_relevance for r in rgroup]
+                if len(rgroup) > 1:
+                    result = self.merge(rgroup, None, do_asr=False)
+                    result.average_source_relevance = sum(rel)/len(rel)
+                else:
+                    result = rgroup[0]
+                self.results.append(result)
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
+    def merge_isbn_results(self):
+        self.results = []
+        for min_year, results in self.pools.itervalues():
+            if results:
+                self.results.append(self.merge(results, min_year))
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
+    def length_merge(self, attr, results, null_value=None, shortest=True):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        values = [x for x in values if len(x) > 0]
+        if not values:
+            return null_value
+        values.sort(key=len, reverse=not shortest)
+        return values[0]
+
+    def random_merge(self, attr, results, null_value=None):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        return values[0] if values else null_value
+
+    def merge(self, results, min_year, do_asr=True):
+        ans = Metadata(_('Unknown'))
+
+        # We assume the shortest title has the least cruft in it
+        ans.title = self.length_merge('title', results, null_value=ans.title)
+
+        # No harm in having extra authors, maybe something useful like an
+        # editor or translator
+        ans.authors = self.length_merge('authors', results,
+                null_value=ans.authors, shortest=False)
+
+        # We assume the shortest publisher has the least cruft in it
+        ans.publisher = self.length_merge('publisher', results,
+                null_value=ans.publisher)
+
+        # We assume the smallest set of tags has the least cruft in it
+        ans.tags = self.length_merge('tags', results,
+                null_value=ans.tags)
+
+        # We assume the longest series has the most info in it
+        ans.series = self.length_merge('series', results,
+                null_value=ans.series, shortest=False)
+        for r in results:
+            if r.series and r.series == ans.series:
+                ans.series_index = r.series_index
+                break
+
+        # Average the rating over all sources
+        ratings = []
+        for r in results:
+            rating = r.rating
+            if rating and rating > 0 and rating <= 5:
+                ratings.append(rating)
+        if ratings:
+            ans.rating = sum(ratings)/len(ratings)
+
+        # Smallest language is likely to be valid
+        ans.language = self.length_merge('language', results,
+                null_value=ans.language)
+
+        # Choose longest comments
+        ans.comments = self.length_merge('comments', results,
+                null_value=ans.comments, shortest=False)
+
+        # Published date
+        if min_year:
+            min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
+            ans.pubdate = min_date
+        else:
+            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
+            for r in results:
+                if r.pubdate is not None and r.pubdate < min_date:
+                    min_date = r.pubdate
+            if min_date.year < 3000:
+                ans.pubdate = min_date
+
+        # Identifiers
+        for r in results:
+            ans.identifiers.update(r.identifiers)
+
+        # Cover URL
+        ans.has_cached_cover_url = bool([r for r in results if
+            getattr(r, 'has_cached_cover_url', False)])
+
+        # Merge any other fields with no special handling (random merge)
+        touched_fields = set()
+        for r in results:
+            if hasattr(r, 'identify_plugin'):
+                touched_fields |= r.identify_plugin.touched_fields
+
+        for f in touched_fields:
+            if f.startswith('identifier:') or not ans.is_null(f):
+                continue
+            setattr(ans, f, self.random_merge(f, results,
+                null_value=getattr(ans, f)))
+
+        if do_asr:
+            avg = [x.relevance_in_source for x in results]
+            avg = sum(avg)/len(avg)
+            ans.average_source_relevance = avg
+
+        return ans
+
+
+def merge_identify_results(result_map, log):
+    isbn_merge = ISBNMerge()
+    for plugin, results in result_map.iteritems():
+        for result in results:
+            isbn_merge.add_result(result)
+
+    return isbn_merge.finalize()
+
+# }}}
+
+def identify(log, abort, # {{{
+        title=None, authors=None, identifiers={}, timeout=30):
+    if title == _('Unknown'):
+        title = None
+    if authors == [_('Unknown')]:
+        authors = None
    start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
+    plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()]

    kwargs = {
        'title': title,
@ -56,14 +270,17 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
    log('Running identify query with parameters:')
    log(kwargs)
    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
+    log('The log from individual plugins is below')

    workers = [Worker(p, kwargs, abort) for p in plugins]
    for w in workers:
        w.start()

    first_result_at = None
-    results = dict.fromkeys(plugins, [])
+    results = {}
+    for p in plugins:
+        results[p] = []
+    logs = dict([(w.plugin, w.buf) for w in workers])

    def get_results():
        found = False
@ -77,6 +294,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
                found = True
        return found

+    wait_time = msprefs['wait_after_first_identify_result']
    while True:
        time.sleep(0.2)

@ -86,76 +304,144 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
        if not is_worker_alive(workers):
            break

-        if (first_result_at is not None and time.time() - first_result_at <
-                WAIT_AFTER_FIRST_RESULT):
+        if (first_result_at is not None and time.time() - first_result_at >
+                wait_time):
            log('Not waiting any longer for more results')
            abort.set()
            break

-    get_results()
+    while not abort.is_set() and get_results():
+        pass
+
    sort_kwargs = dict(kwargs)
    for k in list(sort_kwargs.iterkeys()):
        if k not in ('title', 'authors', 'identifiers'):
            sort_kwargs.pop(k)

-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
+    longest, lp = -1, ''
+    for plugin, presults in results.iteritems():
+        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = logs[plugin].getvalue().strip()
+        log('\n'+'*'*30, plugin.name, '*'*30)
+        log('Request extra headers:', plugin.browser.addheaders)
+        log('Found %d results'%len(presults))
+        time_spent = getattr(plugin, 'dl_time_spent', None)
+        if time_spent is None:
+            log('Downloading was aborted')
+            longest, lp = -1, plugin.name
+        else:
+            log('Downloading from', plugin.name, 'took', time_spent)
+            if time_spent > longest:
+                longest, lp = time_spent, plugin.name
+        for r in presults:
+            log('\n\n---')
+            log(unicode(r))
        if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
            log(plog)
        log('\n'+'*'*80)

-        for i, result in enumerate(results):
+        for i, result in enumerate(presults):
            result.relevance_in_source = i
-            result.has_cached_cover_url = \
-                plugin.get_cached_cover_url(result.identifiers) is not None
+            result.has_cached_cover_url = (plugin.cached_cover_url_is_reliable
+                    and plugin.get_cached_cover_url(result.identifiers) is not
+                    None)
            result.identify_plugin = plugin

    log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('The longest time (%f) was taken by:'%longest, lp)
    log('Merging results from different sources and finding earliest',
            'publication dates')
    start_time = time.time()
-    merged_results = merge_identify_results(results, log)
+    results = merge_identify_results(results, log)
    log('We have %d merged results, merging took: %.2f seconds' %
-            (len(merged_results), time.time() - start_time))
+            (len(results), time.time() - start_time))

-class ISBNMerge(object):
-
-    def __init__(self):
-        self.pools = {}
-
-    def isbn_in_pool(self, isbn):
-        if isbn:
-            for p in self.pools:
-                if isbn in p:
-                    return p
-        return None
-
-    def pool_has_result_from_same_source(self, pool, result):
-        results = self.pools[pool][1]
+    if msprefs['txt_comments']:
        for r in results:
-            if r.identify_plugin is result.identify_plugin:
-                return True
-        return False
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)

-    def add_result(self, result, isbn):
-        pool = self.isbn_in_pool(isbn)
-        if pool is None:
-            isbns, min_year = xisbn.get_isbn_pool(isbn)
-            if not isbns:
-                isbns = frozenset([isbn])
-            self.pool[isbns] = pool = (min_year, [])
+    max_tags = msprefs['max_tags']
+    for r in results:
+        r.tags = r.tags[:max_tags]

-        if not self.pool_has_result_from_same_source(pool, result):
-            pool[1].append(result)
+    if msprefs['swap_author_names']:
+        for r in results:
+            def swap_to_ln_fn(a):
+                if ',' in a:
+                    return a
+                parts = a.split(None)
+                if len(parts) <= 1:
+                    return a
+                surname = parts[-1]
+                return '%s, %s' % (surname, ' '.join(parts[:-1]))
+            r.authors = [swap_to_ln_fn(a) for a in r.authors]

-def merge_identify_results(result_map, log):
-    for plugin, results in result_map.iteritems():
-        for result in results:
-            isbn = result.isbn
+    return results
+# }}}
+
+def urls_from_identifiers(identifiers): # {{{
+    ans = []
+    for plugin in all_metadata_plugins():
+        try:
+            url = plugin.get_book_url(identifiers)
+            if url is not None:
+                ans.append((plugin.name, url))
+        except:
+            pass
+    isbn = identifiers.get('isbn', None)
    if isbn:
-                isbns, min_year = xisbn.get_isbn_pool(isbn)
+        ans.append(('ISBN',
+            'http://www.worldcat.org/search?q=bn%%3A%s&qt=advanced'%isbn))
+    return ans
+# }}}

+if __name__ == '__main__': # tests {{{
+    # To run these test use: calibre-debug -e
+    # src/calibre/ebooks/metadata/sources/identify.py
+    from calibre.ebooks.metadata.sources.test import (test_identify,
+            title_test, authors_test)
+    tests = [
+
+            ( # An e-book ISBN not on Amazon, one of the authors is
+              # unknown to Amazon
+                {'identifiers':{'isbn': '9780307459671'},
+                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+                [title_test('The Invisible Gorilla',
+                    exact=True), authors_test(['Christopher F. Chabris', 'Daniel Simons'])]
+
+            ),
+
+            (  # Test absence of identifiers
+                {'title':'Learning Python',
+                    'authors':['Lutz']},
+                [title_test('Learning Python',
+                    exact=True), authors_test(['Mark J. Lutz', 'David Ascher'])
+                 ]
+
+            ),
+
+            ( # Sophisticated comment formatting
+                {'identifiers':{'isbn': '9781416580829'}},
+                [title_test('Angels & Demons',
+                    exact=True), authors_test(['Dan Brown'])]
+            ),
+
+            ( # No ISBN
+                {'title':'Justine', 'authors':['Durrel']},
+                [title_test('Justine', exact=True),
+                    authors_test(['Lawrence Durrel'])]
+            ),
+
+            (  # A newer book
+                {'identifiers':{'isbn': '9780316044981'}},
+                [title_test('The Heroes', exact=True),
+                    authors_test(['Joe Abercrombie'])]
+
+            ),
+
+        ]
+    #test_identify(tests[1:2])
+    test_identify(tests)
+# }}}

--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -0,0 +1,246 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from urllib import quote
+
+from lxml import etree
+
+from calibre.ebooks.metadata import check_isbn
+from calibre.ebooks.metadata.sources.base import Source, Option
+from calibre.ebooks.chardet import xml_to_unicode
+from calibre.utils.cleantext import clean_ascii_chars
+from calibre.utils.icu import lower
+from calibre.ebooks.metadata.book.base import Metadata
+
+BASE_URL = 'http://isbndb.com/api/books.xml?access_key=%s&page_number=1&results=subjects,authors,texts&'
+
+
+class ISBNDB(Source):
+
+    name = 'ISBNDB'
+    description = _('Downloads metadata from isbndb.com')
+
+    capabilities = frozenset(['identify'])
+    touched_fields = frozenset(['title', 'authors',
+        'identifier:isbn', 'comments', 'publisher'])
+    supports_gzip_transfer_encoding = True
+    # Shortcut, since we have no cached cover URLS
+    cached_cover_url_is_reliable = False
+
+    options = (
+            Option('isbndb_key', 'string', None, _('IsbnDB key:'),
+                _('To use isbndb.com you have to sign up for a free account'
+                    'at isbndb.com and get an access key.')),
+            )
+
+    config_help_message = '<p>'+_('To use metadata from isbndb.com you must sign'
+            ' up for a free account and get an isbndb key and enter it below.'
+            ' Instructions to get the key are '
+            '<a href="http://isbndb.com/docs/api/30-keys.html">here</a>.')
+
+
+    def __init__(self, *args, **kwargs):
+        Source.__init__(self, *args, **kwargs)
+
+        prefs = self.prefs
+        prefs.defaults['key_migrated'] = False
+        prefs.defaults['isbndb_key'] = None
+
+        if not prefs['key_migrated']:
+            prefs['key_migrated'] = True
+            try:
+                from calibre.customize.ui import config
+                key = config['plugin_customization']['IsbnDB']
+                prefs['isbndb_key'] = key
+            except:
+                pass
+
+    @property
+    def isbndb_key(self):
+        return self.prefs['isbndb_key']
+
+    def is_configured(self):
+        return self.isbndb_key is not None
+
+    def create_query(self, title=None, authors=None, identifiers={}): # {{{
+        base_url = BASE_URL%self.isbndb_key
+        isbn = check_isbn(identifiers.get('isbn', None))
+        q = ''
+        if isbn is not None:
+            q = 'index1=isbn&value1='+isbn
+        elif title or authors:
+            tokens = []
+            title_tokens = list(self.get_title_tokens(title))
+            tokens += title_tokens
+            author_tokens = self.get_author_tokens(authors,
+                    only_first_author=True)
+            tokens += author_tokens
+            tokens = [quote(t) for t in tokens]
+            q = '+'.join(tokens)
+            q = 'index1=combined&value1='+q
+
+        if not q:
+            return None
+        if isinstance(q, unicode):
+            q = q.encode('utf-8')
+        return base_url + q
+    # }}}
+
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
+            identifiers={}, timeout=30):
+        if not self.is_configured():
+            return
+        query = self.create_query(title=title, authors=authors,
+                identifiers=identifiers)
+        if not query:
+            err = 'Insufficient metadata to construct query'
+            log.error(err)
+            return err
+
+        results = []
+        try:
+            results = self.make_query(query, abort, title=title, authors=authors,
+                    identifiers=identifiers, timeout=timeout)
+        except:
+            err = 'Failed to make query to ISBNDb, aborting.'
+            log.exception(err)
+            return err
+
+        if not results and identifiers.get('isbn', False) and title and authors and \
+                not abort.is_set():
+            return self.identify(log, result_queue, abort, title=title,
+                    authors=authors, timeout=timeout)
+
+        for result in results:
+            self.clean_downloaded_metadata(result)
+            result_queue.put(result)
+
+    def parse_feed(self, feed, seen, orig_title, orig_authors, identifiers):
+
+        def tostring(x):
+            if x is None:
+                return ''
+            return etree.tostring(x, method='text', encoding=unicode).strip()
+
+        orig_isbn = identifiers.get('isbn', None)
+        title_tokens = list(self.get_title_tokens(orig_title))
+        author_tokens = list(self.get_author_tokens(orig_authors))
+        results = []
+
+        def ismatch(title, authors):
+            authors = lower(' '.join(authors))
+            title = lower(title)
+            match = not title_tokens
+            for t in title_tokens:
+                if lower(t) in title:
+                    match = True
+                    break
+            amatch = not author_tokens
+            for a in author_tokens:
+                if lower(a) in authors:
+                    amatch = True
+                    break
+            if not author_tokens: amatch = True
+            return match and amatch
+
+        bl = feed.find('BookList')
+        if bl is None:
+            err = tostring(etree.find('errormessage'))
+            raise ValueError('ISBNDb query failed:' + err)
+        total_results = int(bl.get('total_results'))
+        shown_results = int(bl.get('shown_results'))
+        for bd in bl.xpath('.//BookData'):
+            isbn = check_isbn(bd.get('isbn13', bd.get('isbn', None)))
+            if not isbn:
+                continue
+            if orig_isbn and isbn != orig_isbn:
+                continue
+            title = tostring(bd.find('Title'))
+            if not title:
+                continue
+            authors = []
+            for au in bd.xpath('.//Authors/Person'):
+                au = tostring(au)
+                if au:
+                    if ',' in au:
+                        ln, _, fn = au.partition(',')
+                        au = fn.strip() + ' ' + ln.strip()
+                authors.append(au)
+            if not authors:
+                continue
+            comments = tostring(bd.find('Summary'))
+            if not comments:
+                # Require comments, since without them the result is useless
+                # anyway
+                continue
+            id_ = (title, tuple(authors))
+            if id_ in seen:
+                continue
+            seen.add(id_)
+            if not ismatch(title, authors):
+                continue
+            publisher = tostring(bd.find('PublisherText'))
+            if not publisher: publisher = None
+            if publisher and 'audio' in publisher.lower():
+                continue
+            mi = Metadata(title, authors)
+            mi.isbn = isbn
+            mi.publisher = publisher
+            mi.comments = comments
+            results.append(mi)
+        return total_results, shown_results, results
+
+    def make_query(self, q, abort, title=None, authors=None, identifiers={},
+            max_pages=10, timeout=30):
+        page_num = 1
+        parser = etree.XMLParser(recover=True, no_network=True)
+        br = self.browser
+
+        seen = set()
+
+        candidates = []
+        total_found = 0
+        while page_num <= max_pages and not abort.is_set():
+            url = q.replace('&page_number=1&', '&page_number=%d&'%page_num)
+            page_num += 1
+            raw = br.open_novisit(url, timeout=timeout).read()
+            feed = etree.fromstring(xml_to_unicode(clean_ascii_chars(raw),
+                strip_encoding_pats=True)[0], parser=parser)
+            total, found, results = self.parse_feed(
+                    feed, seen, title, authors, identifiers)
+            total_found += found
+            candidates += results
+            if total_found >= total or len(candidates) > 9:
+                break
+
+        return candidates
+    # }}}
+
+if __name__ == '__main__':
+    # To run these test use:
+    # calibre-debug -e src/calibre/ebooks/metadata/sources/isbndb.py
+    from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
+            title_test, authors_test)
+    test_identify_plugin(ISBNDB.name,
+        [
+
+
+            (
+                {'title':'Great Gatsby',
+                    'authors':['Fitzgerald']},
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['F. Scott Fitzgerald'])]
+            ),
+
+            (
+                {'title': 'Flatland', 'authors':['Abbott']},
+                [title_test('Flatland', exact=False)]
+            ),
+    ])
+
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@ -12,7 +12,7 @@ from calibre.ebooks.metadata.sources.base import Source
 class OpenLibrary(Source):

    name = 'Open Library'
-    description = _('Downloads metadata from The Open Library')
+    description = _('Downloads covers from The Open Library')

    capabilities = frozenset(['cover'])

@ -26,7 +26,7 @@ class OpenLibrary(Source):
        br = self.browser
        try:
            ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
-            result_queue.put(ans)
+            result_queue.put((self, ans))
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                log.error('No cover for ISBN: %r found'%isbn)
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
+        get_cached_cover_urls)

 def isbn_test(isbn):
    isbn_ = check_isbn(isbn)
@ -45,8 +46,80 @@ def authors_test(authors):

    return test

+def init_test(tdir_name):
+    tdir = tempfile.gettempdir()
+    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
+    log = create_log(open(lf, 'wb'))
+    abort = Event()
+    return tdir, lf, log, abort

-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
+    '''
+    :param tests: List of 2-tuples. Each two tuple is of the form (args,
+                  test_funcs). args is a dict of keyword arguments to pass to
+                  the identify method. test_funcs are callables that accept a
+                  Metadata object and return True iff the object passes the
+                  test.
+    '''
+    from calibre.ebooks.metadata.sources.identify import identify
+
+    tdir, lf, log, abort = init_test('Full Identify')
+    prints('Log saved to', lf)
+
+    times = []
+
+    for kwargs, test_funcs in tests:
+        log('#'*80)
+        log('### Running test with:', kwargs)
+        log('#'*80)
+        prints('Running test with:', kwargs)
+        args = (log, abort)
+        start_time = time.time()
+        results = identify(*args, **kwargs)
+        total_time = time.time() - start_time
+        times.append(total_time)
+        if not results:
+            prints('identify failed to find any results')
+            break
+
+        prints('Found', len(results), 'matches:', end=' ')
+        prints('Smaller relevance means better match')
+
+        for i, mi in enumerate(results):
+            prints('*'*30, 'Relevance:', i, '*'*30)
+            prints(mi)
+            prints('\nCached cover URLs    :',
+                    [x[0].name for x in get_cached_cover_urls(mi)])
+            prints('*'*75, '\n\n')
+
+        possibles = []
+        for mi in results:
+            test_failed = False
+            for tfunc in test_funcs:
+                if not tfunc(mi):
+                    test_failed = True
+                    break
+            if not test_failed:
+                possibles.append(mi)
+
+        if not possibles:
+            prints('ERROR: No results that passed all tests were found')
+            prints('Log saved to', lf)
+            raise SystemExit(1)
+
+        if results[0] is not possibles[0]:
+            prints('Most relevant result failed the tests')
+            raise SystemExit(1)
+
+        log('\n\n')
+
+    prints('Average time per query', sum(times)/len(times))
+
+    prints('Full log is at:', lf)
+
+# }}}
+
+def test_identify_plugin(name, tests): # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -61,11 +134,9 @@ def test_identify_plugin(name, tests):
            plugin = x
            break
    prints('Testing the identify function of', plugin.name)
+    prints('Using extra headers:', plugin.browser.addheaders)

-    tdir = tempfile.gettempdir()
-    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
-    log = create_log(open(lf, 'wb'))
-    abort = Event()
+    tdir, lf, log, abort = init_test(plugin.name)
    prints('Log saved to', lf)

    times = []
@ -147,11 +218,11 @@ def test_identify_plugin(name, tests):
                '')+'-%s-cover.jpg'%sanitize_file_name2(mi.title.replace(' ',
                    '_')))
            with open(cover, 'wb') as f:
-                f.write(cdata)
+                f.write(cdata[-1])

            prints('Cover downloaded to:', cover)

-            if len(cdata) < 10240:
+            if len(cdata[-1]) < 10240:
                prints('Downloaded cover too small')
                raise SystemExit(1)

@ -159,4 +230,5 @@ def test_identify_plugin(name, tests):

    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
+# }}}

--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@ -73,7 +73,11 @@ class xISBN(object):

    def get_isbn_pool(self, isbn):
        data = self.get_data(isbn)
-        isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
+        raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
+        isbns = []
+        for x in raw:
+            isbns += x
+        isbns = frozenset(isbns)
        min_year = 100000
        for x in data:
            try:
--- a/src/calibre/ebooks/mobi/debug.py
+++ b/src/calibre/ebooks/mobi/debug.py
@ -0,0 +1,408 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import struct, datetime
+from calibre.utils.date import utc_tz
+from calibre.ebooks.mobi.langcodes import main_language, sub_language
+
+class PalmDOCAttributes(object):
+
+    class Attr(object):
+
+        def __init__(self, name, field, val):
+            self.name = name
+            self.val = val & field
+
+        def __str__(self):
+            return '%s: %s'%(self.name, bool(self.val))
+
+    def __init__(self, raw):
+        self.val = struct.unpack(b'<H', raw)[0]
+        self.attributes = []
+        for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
+                ('Backup this database', 0x08),
+                ('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
+                ('Force the PalmPilot to reset after this database is installed', 0x12),
+                ('Don\'t allow copy of file to be beamed to other Pilot',
+                    0x14)]:
+            self.attributes.append(PalmDOCAttributes.Attr(name, field,
+                self.val))
+
+    def __str__(self):
+        attrs = '\n\t'.join([str(x) for x in self.attributes])
+        return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
+
+class PalmDB(object):
+
+    def __init__(self, raw):
+        self.raw = raw
+
+        if self.raw.startswith(b'TPZ'):
+            raise ValueError('This is a Topaz file')
+
+        self.name     = self.raw[:32].replace(b'\x00', b'')
+        self.attributes = PalmDOCAttributes(self.raw[32:34])
+        self.version = struct.unpack(b'>H', self.raw[34:36])[0]
+
+        palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
+        self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
+        self.creation_date = (palm_epoch +
+                datetime.timedelta(seconds=self.creation_date_raw))
+        self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
+        self.modification_date = (palm_epoch +
+                datetime.timedelta(seconds=self.modification_date_raw))
+        self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
+        self.last_backup_date = (palm_epoch +
+                datetime.timedelta(seconds=self.last_backup_date_raw))
+        self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
+        self.app_info_id = self.raw[52:56]
+        self.sort_info_id = self.raw[56:60]
+        self.type = self.raw[60:64]
+        self.creator = self.raw[64:68]
+        self.ident = self.type + self.creator
+        if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
+            raise ValueError('Unknown book ident: %r'%self.ident)
+        self.uid_seed = self.raw[68:72]
+        self.next_rec_list_id = self.raw[72:76]
+
+        self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
+
+    def __str__(self):
+        ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
+        ans.append('Name: %r'%self.name)
+        ans.append(str(self.attributes))
+        ans.append('Version: %s'%self.version)
+        ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
+            self.creation_date_raw))
+        ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
+            self.modification_date_raw))
+        ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
+            self.last_backup_date_raw))
+        ans.append('Modification number: %s'%self.modification_number)
+        ans.append('App Info ID: %r'%self.app_info_id)
+        ans.append('Sort Info ID: %r'%self.sort_info_id)
+        ans.append('Type: %r'%self.type)
+        ans.append('Creator: %r'%self.creator)
+        ans.append('UID seed: %r'%self.uid_seed)
+        ans.append('Next record list id: %r'%self.next_rec_list_id)
+        ans.append('Number of records: %s'%self.number_of_records)
+
+        return '\n'.join(ans)
+
+class Record(object):
+
+    def __init__(self, raw, header):
+        self.offset, self.flags, self.uid = header
+        self.raw = raw
+
+    @property
+    def header(self):
+        return 'Offset: %d Flags: %d UID: %d'%(self.offset, self.flags,
+                self.uid)
+
+class EXTHRecord(object):
+
+    def __init__(self, type_, data):
+        self.type = type_
+        self.data = data
+        self.name = {
+                1 : 'DRM Server id',
+                2 : 'DRM Commerce id',
+                3 : 'DRM ebookbase book id',
+                100 : 'author',
+                101 : 'publisher',
+                102 : 'imprint',
+                103 : 'description',
+                104 : 'isbn',
+                105 : 'subject',
+                106 : 'publishingdate',
+                107 : 'review',
+                108 : 'contributor',
+                109 : 'rights',
+                110 : 'subjectcode',
+                111 : 'type',
+                112 : 'source',
+                113 : 'asin',
+                114 : 'versionnumber',
+                115 : 'sample',
+                116 : 'startreading',
+                117 : 'adult',
+                118 : 'retailprice',
+                119 : 'retailpricecurrency',
+                201 : 'coveroffset',
+                202 : 'thumboffset',
+                203 : 'hasfakecover',
+                204 : 'Creator Software',
+                205 : 'Creator Major Version', # '>I'
+                206 : 'Creator Minor Version', # '>I'
+                207 : 'Creator Build Number', # '>I'
+                208 : 'watermark',
+                209 : 'tamper_proof_keys',
+                300 : 'fontsignature',
+                301 : 'clippinglimit', # percentage '>B'
+                402 : 'publisherlimit',
+                404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
+                501 : 'cdetype', # 4 chars (PDOC or EBOK)
+                502 : 'lastupdatetime',
+                503 : 'updatedtitle',
+        }.get(self.type, repr(self.type))
+
+        if self.name in ('coveroffset', 'thumboffset', 'hasfakecover',
+                'Creator Major Version', 'Creator Minor Version',
+                'Creator Build Number', 'Creator Software', 'startreading'):
+            self.data, = struct.unpack(b'>I', self.data)
+
+    def __str__(self):
+        return '%s (%d): %r'%(self.name, self.type, self.data)
+
+class EXTHHeader(object):
+
+    def __init__(self, raw):
+        self.raw = raw
+        if not self.raw.startswith(b'EXTH'):
+            raise ValueError('EXTH header does not start with EXTH')
+        self.length, = struct.unpack(b'>I', self.raw[4:8])
+        self.count,  = struct.unpack(b'>I', self.raw[8:12])
+
+        pos = 12
+        self.records = []
+        for i in xrange(self.count):
+            pos = self.read_record(pos)
+
+    def read_record(self, pos):
+        type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
+        data = self.raw[(pos+8):(pos+length)]
+        self.records.append(EXTHRecord(type_, data))
+        return pos + length
+
+    def __str__(self):
+        ans = ['*'*20 + ' EXTH Header '+ '*'*20]
+        ans.append('EXTH header length: %d'%self.length)
+        ans.append('Number of EXTH records: %d'%self.count)
+        ans.append('EXTH records...')
+        for r in self.records:
+            ans.append(str(r))
+        return '\n'.join(ans)
+
+
+class MOBIHeader(object):
+
+    def __init__(self, record0):
+        self.raw = record0.raw
+
+        self.compression_raw = self.raw[:2]
+        self.compression = {1: 'No compression', 2: 'PalmDoc compression',
+                17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
+                    self.compression_raw)[0],
+                    repr(self.compression_raw))
+        self.unused = self.raw[2:4]
+        self.text_length, = struct.unpack(b'>I', self.raw[4:8])
+        self.number_of_text_records, self.text_record_size = \
+                struct.unpack(b'>HH', self.raw[8:12])
+        self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
+        self.encryption_type = {0: 'No encryption',
+                1: 'Old mobipocket encryption',
+                2:'Mobipocket encryption'}.get(self.encryption_type_raw,
+                repr(self.encryption_type_raw))
+        self.unknown = self.raw[14:16]
+
+        self.identifier = self.raw[16:20]
+        if self.identifier != b'MOBI':
+            raise ValueError('Identifier %r unknown'%self.identifier)
+
+        self.length, = struct.unpack(b'>I', self.raw[20:24])
+        self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
+        self.type = {
+                2 : 'Mobipocket book',
+                3 : 'PalmDOC book',
+                4 : 'Audio',
+                257 : 'News',
+                258 : 'News Feed',
+                259 : 'News magazine',
+                513 : 'PICS',
+                514 : 'Word',
+                515 : 'XLS',
+                516 : 'PPT',
+                517 : 'TEXT',
+                518 : 'HTML',
+            }.get(self.type_raw, repr(self.type_raw))
+
+        self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
+        self.encoding = {
+                1252 : 'cp1252',
+                65001: 'utf-8',
+            }.get(self.encoding_raw, repr(self.encoding_raw))
+        self.uid = self.raw[32:36]
+        self.file_version = struct.unpack(b'>I', self.raw[36:40])
+        self.reserved = self.raw[40:48]
+        self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
+        self.reserved2 = self.raw[52:80]
+        self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
+        self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
+        self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
+        self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
+        langcode = self.locale_raw
+        langid    = langcode & 0xFF
+        sublangid = (langcode >> 10) & 0xFF
+        self.language = main_language.get(langid, 'ENGLISH')
+        self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
+
+        self.input_language = self.raw[96:100]
+        self.output_langauage = self.raw[100:104]
+        self.min_version, = struct.unpack(b'>I', self.raw[104:108])
+        self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
+        self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
+        self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
+        self.unknown2 = self.raw[120:128]
+        self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
+        self.has_exth = bool(self.exth_flags & 0x40)
+        self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
+        if self.has_drm_data:
+            self.unknown3 = self.raw[132:164]
+            self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
+            self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
+            self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
+            self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
+        self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
+        self.has_fcis_flis = False
+        if self.has_extra_data_flags:
+            self.unknown4 = self.raw[180:192]
+            self.first_content_record, self.last_content_record = \
+                    struct.unpack(b'>HH', self.raw[192:196])
+            self.unknown5, = struct.unpack(b'>I', self.raw[196:200])
+            (self.fcis_number, self.fcis_count, self.flis_number,
+                    self.flis_count) = struct.unpack(b'>IIII',
+                            self.raw[200:216])
+            self.unknown6 = self.raw[216:240]
+            self.extra_data_flags = bin(struct.unpack(b'>I',
+                self.raw[240:244])[0])
+            self.primary_index_record, = struct.unpack(b'>I',
+                    self.raw[244:248])
+
+        if self.has_exth:
+            self.exth_offset = 16 + self.length
+
+            self.exth = EXTHHeader(self.raw[self.exth_offset:])
+
+            self.end_of_exth = self.exth_offset + self.exth.length
+            self.bytes_after_exth = self.fullname_offset - self.end_of_exth
+
+    def __str__(self):
+        ans = ['*'*20 + ' MOBI Header '+ '*'*20]
+        ans.append('Compression: %s'%self.compression)
+        ans.append('Unused: %r'%self.unused)
+        ans.append('Number of text records: %d'%self.number_of_text_records)
+        ans.append('Text record size: %d'%self.text_record_size)
+        ans.append('Encryption: %s'%self.encryption_type)
+        ans.append('Unknown: %r'%self.unknown)
+        ans.append('Identifier: %r'%self.identifier)
+        ans.append('Header length: %d'% self.length)
+        ans.append('Type: %s'%self.type)
+        ans.append('Encoding: %s'%self.encoding)
+        ans.append('UID: %r'%self.uid)
+        ans.append('File version: %d'%self.file_version)
+        ans.append('Reserved: %r'%self.reserved)
+        ans.append('Secondary index record: %d (null val: %d)'%(
+            self.secondary_index_record, 0xffffffff))
+        ans.append('Reserved2: %r'%self.reserved2)
+        ans.append('First non-book record: %d'% self.first_non_book_record)
+        ans.append('Full name offset: %d'%self.fullname_offset)
+        ans.append('Full name length: %d bytes'%self.fullname_length)
+        ans.append('Langcode: %r'%self.locale_raw)
+        ans.append('Language: %s'%self.language)
+        ans.append('Sub language: %s'%self.sublanguage)
+        ans.append('Input language: %r'%self.input_language)
+        ans.append('Output language: %r'%self.output_langauage)
+        ans.append('Min version: %d'%self.min_version)
+        ans.append('First Image index: %d'%self.first_image_index)
+        ans.append('Huffman record offset: %d'%self.huffman_record_offset)
+        ans.append('Huffman record count: %d'%self.huffman_record_count)
+        ans.append('Unknown2: %r'%self.unknown2)
+        ans.append('EXTH flags: %r (%s)'%(self.exth_flags, self.has_exth))
+        if self.has_drm_data:
+            ans.append('Unknown3: %r'%self.unknown3)
+            ans.append('DRM Offset: %s'%self.drm_offset)
+            ans.append('DRM Count: %s'%self.drm_count)
+            ans.append('DRM Size: %s'%self.drm_size)
+            ans.append('DRM Flags: %r'%self.drm_flags)
+        if self.has_extra_data_flags:
+            ans.append('Unknown4: %r'%self.unknown4)
+            ans.append('First content record: %d'% self.first_content_record)
+            ans.append('Last content record: %d'% self.last_content_record)
+            ans.append('Unknown5: %d'% self.unknown5)
+            ans.append('FCIS number: %d'% self.fcis_number)
+            ans.append('FCIS count: %d'% self.fcis_count)
+            ans.append('FLIS number: %d'% self.flis_number)
+            ans.append('FLIS count: %d'% self.flis_count)
+            ans.append('Unknown6: %r'% self.unknown6)
+            ans.append('Extra data flags: %r'%self.extra_data_flags)
+            ans.append('Primary index record: %d'%self.primary_index_record)
+
+        ans = '\n'.join(ans)
+
+        if self.has_exth:
+            ans += '\n\n' + str(self.exth)
+            ans += '\n\nBytes after EXTH: %d'%self.bytes_after_exth
+
+        ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
+                self.fullname_length))
+
+        ans += '\nRecord 0 length: %d'%len(self.raw)
+        return ans
+
+class MOBIFile(object):
+
+    def __init__(self, stream):
+        self.raw = stream.read()
+
+        self.palmdb = PalmDB(self.raw[:78])
+
+        self.record_headers = []
+        self.records = []
+        for i in xrange(self.palmdb.number_of_records):
+            pos = 78 + i * 8
+            offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
+            flags, val = a1, a2 << 16 | a3 << 8 | a4
+            self.record_headers.append((offset, flags, val))
+
+        def section(section_number):
+            if section_number == self.palmdb.number_of_records - 1:
+                end_off = len(self.raw)
+            else:
+                end_off = self.record_headers[section_number + 1][0]
+            off = self.record_headers[section_number][0]
+            return self.raw[off:end_off]
+
+        for i in range(self.palmdb.number_of_records):
+            self.records.append(Record(section(i), self.record_headers[i]))
+
+        self.mobi_header = MOBIHeader(self.records[0])
+
+
+    def print_header(self):
+        print (str(self.palmdb).encode('utf-8'))
+        print ()
+        print ('Record headers:')
+        for i, r in enumerate(self.records):
+            print ('%6d. %s'%(i, r.header))
+
+        print ()
+        print (str(self.mobi_header).encode('utf-8'))
+
+def inspect_mobi(path_or_stream):
+    stream = (path_or_stream if hasattr(path_or_stream, 'read') else
+            open(path_or_stream, 'rb'))
+    f = MOBIFile(stream)
+    f.print_header()
+
+if __name__ == '__main__':
+    import sys
+    f = MOBIFile(open(sys.argv[1], 'rb'))
+    f.print_header()
+
--- a/src/calibre/ebooks/mobi/mobiml.py
+++ b/src/calibre/ebooks/mobi/mobiml.py
@ -463,9 +463,9 @@ class MobiMLizer(object):
                text = COLLAPSE.sub(' ', elem.text)
        valign = style['vertical-align']
        not_baseline = valign in ('super', 'sub', 'text-top',
-                'text-bottom') or (
+                'text-bottom', 'top', 'bottom') or (
                isinstance(valign, (float, int)) and abs(valign) != 0)
-        issup = valign in ('super', 'text-top') or (
+        issup = valign in ('super', 'text-top', 'top') or (
            isinstance(valign, (float, int)) and valign > 0)
        vtag = 'sup' if issup else 'sub'
        if not_baseline and not ignore_valign and tag not in NOT_VTAGS and not isblock:
@ -484,6 +484,7 @@ class MobiMLizer(object):
            parent = bstate.para if bstate.inline is None else bstate.inline
            if parent is not None:
                vtag = etree.SubElement(parent, XHTML(vtag))
+                vtag = etree.SubElement(vtag, XHTML('small'))
                # Add anchors
                for child in vbstate.body:
                    if child is not vbstate.para:
@ -495,6 +496,10 @@ class MobiMLizer(object):
                        vtag.append(child)
                return

+        if tag == 'blockquote':
+            old_mim = self.opts.mobi_ignore_margins
+            self.opts.mobi_ignore_margins = False
+
        if text or tag in CONTENT_TAGS or tag in NESTABLE_TAGS:
            self.mobimlize_content(tag, text, bstate, istates)
        for child in elem:
@ -510,6 +515,8 @@ class MobiMLizer(object):
            if tail:
                self.mobimlize_content(tag, tail, bstate, istates)

+        if tag == 'blockquote':
+            self.opts.mobi_ignore_margins = old_mim

        if bstate.content and style['page-break-after'] in PAGE_BREAKS:
            bstate.pbreak = True
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -7,8 +7,6 @@ __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.cam> and \
        Kovid Goyal <kovid@kovidgoyal.net>'

 from collections import defaultdict
-from itertools import count
-from itertools import izip
 import random
 import re
 from struct import pack
@ -282,8 +280,8 @@ class Serializer(object):
                buffer.write('="')
                self.serialize_text(val, quot=True)
                buffer.write('"')
-        if elem.text or len(elem) > 0:
        buffer.write('>')
+        if elem.text or len(elem) > 0:
            if elem.text:
                self.anchor_offset = None
                self.serialize_text(elem.text)
@ -293,8 +291,6 @@ class Serializer(object):
                    self.anchor_offset = None
                    self.serialize_text(child.tail)
        buffer.write('</%s>' % tag)
-        else:
-            buffer.write('/>')

    def serialize_text(self, text, quot=False):
        text = text.replace('&', '&amp;')
@ -312,6 +308,7 @@ class Serializer(object):
            if href not in id_offsets:
                self.logger.warn('Hyperlink target %r not found' % href)
                href, _ = urldefrag(href)
+            if href in self.id_offsets:
                ioff = self.id_offsets[href]
                for hoff in hoffs:
                    buffer.seek(hoff)
@ -1512,7 +1509,7 @@ class MobiWriter(object):
        record0.write(exth)
        record0.write(title)
        record0 = record0.getvalue()
-        self._records[0] = record0 + ('\0' * (2452 - len(record0)))
+        self._records[0] = record0 + ('\0' * (1024*8))

    def _build_exth(self):
        oeb = self._oeb
@ -1631,8 +1628,8 @@ class MobiWriter(object):
        self._write(title, pack('>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
            'BOOK', 'MOBI', pack('>IIH', nrecords, 0, nrecords))
        offset = self._tell() + (8 * nrecords) + 2
-        for id, record in izip(count(), self._records):
-            self._write(pack('>I', offset), '\0', pack('>I', id)[1:])
+        for i, record in enumerate(self._records):
+            self._write(pack('>I', offset), '\0', pack('>I', 2*i)[1:])
            offset += len(record)
        self._write('\0\0')

--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
+
+from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
@ -95,6 +97,10 @@ class CSSSelector(etree.XPath):

    def __init__(self, css, namespaces=XPNSMAP):
        css = self.MIN_SPACE_RE.sub(r'\1', css)
+        if isinstance(css, unicode):
+            # Workaround for bug in lxml on windows/OS X that causes a massive
+            # memory leak with non ASCII selectors
+            css = css.encode('ascii', 'ignore').decode('ascii')
        try:
            path = css_to_xpath(css)
        except UnicodeEncodeError: # Bug in css_to_xpath
@ -140,8 +146,17 @@ class Stylizer(object):
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
-            if elem.tag == XHTML('style') and elem.text \
-               and elem.get('type', CSS_MIME) in OEB_STYLES:
+            if (elem.tag == XHTML('style') and
+                elem.get('type', CSS_MIME) in OEB_STYLES):
+                text = elem.text if elem.text else u''
+                for x in elem:
+                    t = getattr(x, 'text', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                    t = getattr(x, 'tail', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                if text:
                    text = XHTML_CSS_NAMESPACE + elem.text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname)
--- a/src/calibre/ebooks/oeb/transforms/page_margin.py
+++ b/src/calibre/ebooks/oeb/transforms/page_margin.py
@ -20,8 +20,9 @@ class RemoveAdobeMargins(object):
        self.oeb, self.opts, self.log = oeb, opts, log

        for item in self.oeb.manifest:
-            if item.media_type in ('application/vnd.adobe-page-template+xml',
-                    'application/vnd.adobe.page-template+xml'):
+            if (item.media_type in ('application/vnd.adobe-page-template+xml',
+                    'application/vnd.adobe.page-template+xml') and
+                    hasattr(item.data, 'xpath')):
                self.log('Removing page margins specified in the'
                        ' Adobe page template')
                for elem in item.data.xpath(
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
        size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
        font_family(NULL), color(rgb)  {

+
    if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
    this->font_family = family_name(this->font_name);
    if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@ -134,7 +135,12 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 }

 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
-    XMLFont *f = new XMLFont(font_name, size, rgb);
+    XMLFont *f = NULL;
+    if (font_name == NULL) 
+        font_name = new string("Unknown");
+        // font_name must not be deleted
+    f = new XMLFont(font_name, size, rgb);
+
    return this->add_font(f);
 }

--- a/src/calibre/ebooks/pdf/pdftohtml.py
+++ b/src/calibre/ebooks/pdf/pdftohtml.py
@ -13,7 +13,7 @@ from functools import partial

 from calibre.ebooks import ConversionError, DRMError
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre import isosx, iswindows, islinux, isfreebsd
+from calibre.constants import isosx, iswindows, islinux, isfreebsd
 from calibre import CurrentDir

 PDFTOHTML = 'pdftohtml'
@ -43,6 +43,8 @@ def pdftohtml(output_dir, pdf_path, no_images):
        # This is neccessary as pdftohtml doesn't always (linux) respect absolute paths
        pdf_path = os.path.abspath(pdf_path)
        cmd = [PDFTOHTML, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', pdf_path, os.path.basename(index)]
+        if isfreebsd:
+            cmd.remove('-nodrm')
        if no_images:
            cmd.append('-i')

--- a/src/calibre/ebooks/pml/pmlconverter.py
+++ b/src/calibre/ebooks/pml/pmlconverter.py
@ -11,6 +11,7 @@ __docformat__ = 'restructuredtext en'
 import os
 import re
 import StringIO
+from copy import deepcopy

 from calibre import my_unichr, prepare_string_for_xml
 from calibre.ebooks.metadata.toc import TOC
@ -25,6 +26,7 @@ class PML_HTMLizer(object):
        'sp',
        'sb',
        'h1',
+        'h1c',
        'h2',
        'h3',
        'h4',
@ -58,6 +60,7 @@ class PML_HTMLizer(object):

    STATES_TAGS = {
        'h1': ('<h1 style="page-break-before: always;">', '</h1>'),
+        'h1c': ('<h1>', '</h1>'),
        'h2': ('<h2>', '</h2>'),
        'h3': ('<h3>', '</h3>'),
        'h4': ('<h4>', '</h4>'),
@ -141,6 +144,10 @@ class PML_HTMLizer(object):
        'b',
    ]
    
+    NEW_LINE_EXCHANGE_STATES = {
+        'h1': 'h1c',
+    }
+
    def __init__(self):
        self.state = {}
        # toc consists of a tuple
@ -219,11 +226,17 @@ class PML_HTMLizer(object):
    def start_line(self):
        start = u''

+        state = deepcopy(self.state)
        div = []
        span = []
        other = []
        
-        for key, val in self.state.items():
+        for key, val in state.items():
+            if key in self.NEW_LINE_EXCHANGE_STATES and val[0]:
+                state[self.NEW_LINE_EXCHANGE_STATES[key]] = val
+                state[key] = [False, '']
+
+        for key, val in state.items():
            if val[0]:
                if key in self.DIV_STATES:
                    div.append((key, val[1]))
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@ -37,7 +37,7 @@ class MarkdownMLizer(object):
            if not self.opts.keep_links:
                html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
            if not self.opts.keep_image_references:
-                html = re.sub(r'<\s*img[^>]*>', '', html)\
+                html = re.sub(r'<\s*img[^>]*>', '', html)
            
            text = html2text(html)
        
--- a/src/calibre/gui2/init.py
+++ b/src/calibre/gui2/init.py
@ -13,7 +13,7 @@ from PyQt4.Qt import QVariant, QFileInfo, QObject, SIGNAL, QBuffer, Qt, \

 ORG_NAME = 'KovidsBrain'
 APP_UID  = 'libprs500'
-from calibre.constants import islinux, iswindows, isfreebsd, isfrozen
+from calibre.constants import islinux, iswindows, isfreebsd, isfrozen, isosx
 from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
 from calibre.utils.localization import set_qt_translator
 from calibre.ebooks.metadata.meta import get_metadata, metadata_from_formats
@ -23,14 +23,36 @@ from calibre.utils.date import UNDEFINED_DATE
 # Setup gprefs {{{
 gprefs = JSONConfig('gui')

+if isosx:
+    gprefs.defaults['action-layout-menubar'] = (
+        'Add Books', 'Edit Metadata', 'Convert Books',
+        'Choose Library', 'Save To Disk', 'Preferences',
+        'Help',
+        )
+    gprefs.defaults['action-layout-menubar-device'] = (
+        'Add Books', 'Edit Metadata', 'Convert Books',
+        'Location Manager', 'Send To Device',
+        'Save To Disk', 'Preferences', 'Help',
+        )
+    gprefs.defaults['action-layout-toolbar'] = (
+        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
+        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
+        'Connect Share', None, 'Remove Books',
+        )
+    gprefs.defaults['action-layout-toolbar-device'] = (
+        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View',
+        'Send To Device', None, None, 'Location Manager', None, None,
+        'Fetch News', 'Save To Disk', 'Connect Share', None,
+        'Remove Books',
+        )
+else:
+    gprefs.defaults['action-layout-menubar'] = ()
+    gprefs.defaults['action-layout-menubar-device'] = ()
    gprefs.defaults['action-layout-toolbar'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View', None,
        'Choose Library', 'Donate', None, 'Fetch News', 'Save To Disk',
        'Connect Share', None, 'Remove Books', None, 'Help', 'Preferences',
        )
-
-gprefs.defaults['action-layout-toolbar-child'] = ()
-
    gprefs.defaults['action-layout-toolbar-device'] = (
        'Add Books', 'Edit Metadata', None, 'Convert Books', 'View',
        'Send To Device', None, None, 'Location Manager', None, None,
@ -38,6 +60,8 @@ gprefs.defaults['action-layout-toolbar-device'] = (
        'Remove Books', None, 'Help', 'Preferences',
        )

+gprefs.defaults['action-layout-toolbar-child'] = ()
+
 gprefs.defaults['action-layout-context-menu'] = (
        'Edit Metadata', 'Send To Device', 'Save To Disk',
        'Connect Share', 'Copy To Library', None,
@ -57,6 +81,7 @@ gprefs.defaults['toolbar_text'] = 'auto'
 gprefs.defaults['font'] = None
 gprefs.defaults['tags_browser_partition_method'] = 'first letter'
 gprefs.defaults['tags_browser_collapse_at'] = 100
+gprefs.defaults['edit_metadata_single_layout'] = 'default'

 # }}}

@ -332,6 +357,7 @@ class FileIconProvider(QFileIconProvider):
             'bmp'     : 'bmp',
             'svg'     : 'svg',
             'html'    : 'html',
+             'htmlz'   : 'html',
             'htm'     : 'html',
             'xhtml'   : 'html',
             'xhtm'    : 'html',
--- a/src/calibre/gui2/actions/init.py
+++ b/src/calibre/gui2/actions/init.py
@ -75,7 +75,7 @@ class InterfaceAction(QObject):
    dont_remove_from = frozenset([])

    all_locations = frozenset(['toolbar', 'toolbar-device', 'context-menu',
-        'context-menu-device', 'toolbar-child'])
+        'context-menu-device', 'toolbar-child', 'menubar', 'menubar-device'])

    #: Type of action
    #: 'current' means acts on the current view
@ -145,11 +145,10 @@ class InterfaceAction(QObject):
                    ans[candidate] = zf.read(candidate)
        return ans

-
    def genesis(self):
        '''
        Setup this plugin. Only called once during initialization. self.gui is
-        available. The action secified by :attr:`action_spec` is available as
+        available. The action specified by :attr:`action_spec` is available as
        ``self.qaction``.
        '''
        pass
--- a/src/calibre/gui2/actions/add.py
+++ b/src/calibre/gui2/actions/add.py
@ -22,6 +22,8 @@ from calibre.constants import preferred_encoding, filesystem_encoding
 from calibre.gui2.actions import InterfaceAction
 from calibre.gui2 import config, question_dialog
 from calibre.ebooks.metadata import MetaInformation
+from calibre.utils.config import test_eight_code
+from calibre.ebooks.metadata.sources.base import msprefs

 def get_filters():
    return [
@ -178,6 +180,19 @@ class AddAction(InterfaceAction):
            except IndexError:
                self.gui.library_view.model().books_added(self.isbn_add_dialog.value)
                self.isbn_add_dialog.accept()
+                if test_eight_code:
+                    orig = msprefs['ignore_fields']
+                    new = list(orig)
+                    for x in ('title', 'authors'):
+                        if x in new:
+                            new.remove(x)
+                    msprefs['ignore_fields'] = new
+                    try:
+                        self.gui.iactions['Edit Metadata'].download_metadata(
+                            ids=self.add_by_isbn_ids)
+                    finally:
+                        msprefs['ignore_fields'] = orig
+                else:
                    orig = config['overwrite_author_title_metadata']
                    config['overwrite_author_title_metadata'] = True
                    try:
--- a/src/calibre/gui2/actions/add_to_library.py
+++ b/src/calibre/gui2/actions/add_to_library.py
@ -12,7 +12,7 @@ class AddToLibraryAction(InterfaceAction):
    name = 'Add To Library'
    action_spec = (_('Add books to library'), 'add_book.png',
            _('Add books to your calibre library from the connected device'), None)
-    dont_add_to = frozenset(['toolbar', 'context-menu', 'toolbar-child'])
+    dont_add_to = frozenset(['menubar', 'toolbar', 'context-menu', 'toolbar-child'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/annotate.py
+++ b/src/calibre/gui2/actions/annotate.py
@ -18,6 +18,7 @@ class FetchAnnotationsAction(InterfaceAction):

    name = 'Fetch Annotations'
    action_spec = (_('Fetch annotations (experimental)'), None, None, None)
+    dont_add_to = frozenset(['menubar', 'toolbar', 'context-menu', 'toolbar-child'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/catalog.py
+++ b/src/calibre/gui2/actions/catalog.py
@ -18,7 +18,7 @@ class GenerateCatalogAction(InterfaceAction):

    name = 'Generate Catalog'
    action_spec = (_('Create a catalog of the books in your calibre library'), None, None, None)
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])

    def generate_catalog(self):
        rows = self.gui.library_view.selectionModel().selectedRows()
--- a/src/calibre/gui2/actions/choose_library.py
+++ b/src/calibre/gui2/actions/choose_library.py
@ -8,7 +8,7 @@ __docformat__ = 'restructuredtext en'
 import os, shutil
 from functools import partial

-from PyQt4.Qt import QMenu, Qt, QInputDialog
+from PyQt4.Qt import QMenu, Qt, QInputDialog, QToolButton

 from calibre import isbytestring
 from calibre.constants import filesystem_encoding
@ -80,7 +80,7 @@ class ChooseLibraryAction(InterfaceAction):
    name = 'Choose Library'
    action_spec = (_('%d books'), 'lt.png',
            _('Choose calibre library to work with'), None)
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])

    def genesis(self):
        self.count_changed(0)
@ -88,6 +88,9 @@ class ChooseLibraryAction(InterfaceAction):
                type=Qt.QueuedConnection)

        self.stats = LibraryUsageStats()
+        self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else
+                QToolButton.MenuButtonPopup)
+
        self.create_action(spec=(_('Switch/create library...'), 'lt.png', None,
            None), attr='action_choose')
        self.action_choose.triggered.connect(self.choose_library,
@ -123,6 +126,7 @@ class ChooseLibraryAction(InterfaceAction):
                    type=Qt.QueuedConnection)
            self.choose_menu.addAction(ac)

+
        self.rename_separator = self.choose_menu.addSeparator()

        self.maintenance_menu = QMenu(_('Library Maintenance'))
@ -172,6 +176,7 @@ class ChooseLibraryAction(InterfaceAction):
            return
        db = self.gui.library_view.model().db
        locations = list(self.stats.locations(db))
+
        for ac in self.switch_actions:
            ac.setVisible(False)
        self.quick_menu.clear()
@ -205,7 +210,6 @@ class ChooseLibraryAction(InterfaceAction):
                rename_actions, delete_actions, qs_actions,
                self.action_choose)

-
    def location_selected(self, loc):
        enabled = loc == 'library'
        self.qaction.setEnabled(enabled)
--- a/src/calibre/gui2/actions/convert.py
+++ b/src/calibre/gui2/actions/convert.py
@ -20,7 +20,7 @@ class ConvertAction(InterfaceAction):

    name = 'Convert Books'
    action_spec = (_('Convert books'), 'convert.png', None, _('C'))
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/device.py
+++ b/src/calibre/gui2/actions/device.py
@ -24,7 +24,7 @@ class ShareConnMenu(QMenu): # {{{

    config_email = pyqtSignal()
    toggle_server = pyqtSignal()
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])

    def __init__(self, parent=None):
        QMenu.__init__(self, parent)
@ -121,8 +121,7 @@ class SendToDeviceAction(InterfaceAction):

    name = 'Send To Device'
    action_spec = (_('Send to device'), 'sync.png', None, _('D'))
-    dont_remove_from = frozenset(['toolbar-device'])
-    dont_add_to = frozenset(['toolbar', 'context-menu', 'toolbar-child'])
+    dont_add_to = frozenset(['menubar', 'toolbar', 'context-menu', 'toolbar-child'])

    def genesis(self):
        self.qaction.triggered.connect(self.do_sync)
@ -166,6 +165,10 @@ class ConnectShareAction(InterfaceAction):

    def content_server_state_changed(self, running):
        self.share_conn_menu.server_state_changed(running)
+        if running:
+            self.qaction.setIcon(QIcon(I('connect_share_on.png')))
+        else:
+            self.qaction.setIcon(QIcon(I('connect_share.png')))

    def toggle_content_server(self):
        if self.gui.content_server is None:
--- a/src/calibre/gui2/actions/edit_collections.py
+++ b/src/calibre/gui2/actions/edit_collections.py
@ -12,7 +12,7 @@ class EditCollectionsAction(InterfaceAction):
    name = 'Edit Collections'
    action_spec = (_('Manage collections'), None,
            _('Manage the collections on this device'), None)
-    dont_add_to = frozenset(['toolbar', 'context-menu', 'toolbar-child'])
+    dont_add_to = frozenset(['menubar', 'toolbar', 'context-menu', 'toolbar-child'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -10,13 +10,14 @@ from functools import partial

 from PyQt4.Qt import Qt, QMenu, QModelIndex

-from calibre.gui2 import error_dialog, config
+from calibre.gui2 import error_dialog, config, Dispatcher
 from calibre.gui2.dialogs.metadata_single import MetadataSingleDialog
 from calibre.gui2.dialogs.metadata_bulk import MetadataBulkDialog
 from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.utils.icu import sort_key
+from calibre.utils.config import test_eight_code

 class EditMetadataAction(InterfaceAction):

@ -34,16 +35,23 @@ class EditMetadataAction(InterfaceAction):
        md.addAction(_('Edit metadata in bulk'),
                partial(self.edit_metadata, False, bulk=True))
        md.addSeparator()
-        md.addAction(_('Download metadata and covers'),
-                partial(self.download_metadata, False, covers=True),
+        if test_eight_code:
+            dall = self.download_metadata
+            dident = partial(self.download_metadata, covers=False)
+            dcovers = partial(self.download_metadata, identify=False)
+        else:
+            dall = partial(self.download_metadata_old, False, covers=True)
+            dident = partial(self.download_metadata_old, False, covers=False)
+            dcovers = partial(self.download_metadata_old, False, covers=True,
+                    set_metadata=False, set_social_metadata=False)
+
+        md.addAction(_('Download metadata and covers'), dall,
                Qt.ControlModifier+Qt.Key_D)
-        md.addAction(_('Download only metadata'),
-                partial(self.download_metadata, False, covers=False))
-        md.addAction(_('Download only covers'),
-                partial(self.download_metadata, False, covers=True,
-                    set_metadata=False, set_social_metadata=False))
+        md.addAction(_('Download only metadata'), dident)
+        md.addAction(_('Download only covers'), dcovers)
+        if not test_eight_code:
            md.addAction(_('Download only social metadata'),
-                partial(self.download_metadata, False, covers=False,
+                partial(self.download_metadata_old, False, covers=False,
                    set_metadata=False, set_social_metadata=True))
        self.metadata_menu = md

@ -72,7 +80,26 @@ class EditMetadataAction(InterfaceAction):
        self.qaction.setEnabled(enabled)
        self.action_merge.setEnabled(enabled)

-    def download_metadata(self, checked, covers=True, set_metadata=True,
+    def download_metadata(self, identify=True, covers=True, ids=None):
+        if ids is None:
+            rows = self.gui.library_view.selectionModel().selectedRows()
+            if not rows or len(rows) == 0:
+                return error_dialog(self.gui, _('Cannot download metadata'),
+                            _('No books selected'), show=True)
+            db = self.gui.library_view.model().db
+            ids = [db.id(row.row()) for row in rows]
+        from calibre.gui2.metadata.bulk_download2 import start_download
+        start_download(self.gui, ids,
+                Dispatcher(self.bulk_metadata_downloaded), identify, covers)
+
+    def bulk_metadata_downloaded(self, job):
+        if job.failed:
+            self.gui.job_exception(job, dialog_title=_('Failed to download metadata'))
+            return
+        from calibre.gui2.metadata.bulk_download2 import proceed
+        proceed(self.gui, job)
+
+    def download_metadata_old(self, checked, covers=True, set_metadata=True,
            set_social_metadata=None):
        rows = self.gui.library_view.selectionModel().selectedRows()
        if not rows or len(rows) == 0:
@ -133,8 +160,6 @@ class EditMetadataAction(InterfaceAction):

        row_list = [r.row() for r in rows]
        current_row = 0
-        changed = set([])
-        db = self.gui.library_view.model().db

        if len(row_list) == 1:
            cr = row_list[0]
@ -142,6 +167,27 @@ class EditMetadataAction(InterfaceAction):
                list(range(self.gui.library_view.model().rowCount(QModelIndex())))
            current_row = row_list.index(cr)

+        func = (self.do_edit_metadata if test_eight_code else
+                    self.do_edit_metadata_old)
+        changed, rows_to_refresh = func(row_list, current_row)
+
+        m = self.gui.library_view.model()
+
+        if rows_to_refresh:
+            m.refresh_rows(rows_to_refresh)
+
+        if changed:
+            m.refresh_ids(list(changed))
+            current = self.gui.library_view.currentIndex()
+            if self.gui.cover_flow:
+                self.gui.cover_flow.dataChanged()
+            m.current_changed(current, previous)
+            self.gui.tags_view.recount()
+
+    def do_edit_metadata_old(self, row_list, current_row):
+        changed = set([])
+        db = self.gui.library_view.model().db
+
        while True:
            prev = next_ = None
            if current_row > 0:
@ -166,16 +212,30 @@ class EditMetadataAction(InterfaceAction):
            current_row += d.row_delta
            self.gui.library_view.set_current_row(current_row)
            self.gui.library_view.scroll_to_row(current_row)
+        return changed, set()

+    def do_edit_metadata(self, row_list, current_row):
+        from calibre.gui2.metadata.single import edit_metadata
+        db = self.gui.library_view.model().db
+        changed, rows_to_refresh = edit_metadata(db, row_list, current_row,
+                parent=self.gui, view_slot=self.view_format_callback,
+                set_current_callback=self.set_current_callback)
+        return changed, rows_to_refresh
+
+    def set_current_callback(self, id_):
+        db = self.gui.library_view.model().db
+        current_row = db.row(id_)
+        self.gui.library_view.set_current_row(current_row)
+        self.gui.library_view.scroll_to_row(current_row)
+
+    def view_format_callback(self, id_, fmt):
+        view = self.gui.iactions['View']
+        if id_ is None:
+            view._view_file(fmt)
+        else:
+            db = self.gui.library_view.model().db
+            view.view_format(db.row(id_), fmt)

-        if changed:
-            self.gui.library_view.model().refresh_ids(list(changed))
-            current = self.gui.library_view.currentIndex()
-            m = self.gui.library_view.model()
-            if self.gui.cover_flow:
-                self.gui.cover_flow.dataChanged()
-            m.current_changed(current, previous)
-            self.gui.tags_view.recount()

    def edit_bulk_metadata(self, checked):
        '''
--- a/src/calibre/gui2/actions/next_match.py
+++ b/src/calibre/gui2/actions/next_match.py
@ -11,7 +11,7 @@ class NextMatchAction(InterfaceAction):
    name = 'Move to next highlighted book'
    action_spec = (_('Move to next match'), 'arrow-down.png',
            _('Move to next highlighted match'), [_('N'), _('F3')])
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/open.py
+++ b/src/calibre/gui2/actions/open.py
@ -13,7 +13,7 @@ class OpenFolderAction(InterfaceAction):
    name = 'Open Folder'
    action_spec = (_('Open containing folder'), 'document_open.png', None,
            _('O'))
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/preferences.py
+++ b/src/calibre/gui2/actions/preferences.py
@ -16,7 +16,6 @@ class PreferencesAction(InterfaceAction):

    name = 'Preferences'
    action_spec = (_('Preferences'), 'config.png', None, _('Ctrl+P'))
-    dont_remove_from = frozenset(['toolbar'])

    def genesis(self):
        pm = QMenu()
--- a/src/calibre/gui2/actions/show_book_details.py
+++ b/src/calibre/gui2/actions/show_book_details.py
@ -15,7 +15,7 @@ class ShowBookDetailsAction(InterfaceAction):
    name = 'Show Book Details'
    action_spec = (_('Show book details'), 'dialog_information.png', None,
            _('I'))
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/store.py
+++ b/src/calibre/gui2/actions/store.py
@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+from functools import partial
+
+from PyQt4.Qt import Qt, QMenu, QToolButton, QDialog, QVBoxLayout
+
+from calibre.gui2.actions import InterfaceAction
+
+class StoreAction(InterfaceAction):
+
+    name = 'Store'
+    action_spec = (_('Store'), 'store.png', None, None)
+    
+    def genesis(self):
+        self.qaction.triggered.connect(self.search)
+        self.store_menu = QMenu()
+        self.load_menu()
+        
+    def load_menu(self):
+        self.store_menu.clear()
+        self.store_menu.addAction(_('Search'), self.search)
+        self.store_menu.addSeparator()
+        for n, p in self.gui.istores.items():
+            self.store_menu.addAction(n, partial(self.open_store, p))
+        self.qaction.setMenu(self.store_menu)
+    
+    def search(self):
+        from calibre.gui2.store.search import SearchDialog
+        sd = SearchDialog(self.gui.istores, self.gui)
+        sd.exec_()
+        
+    def open_store(self, store_plugin):
+        store_plugin.open(self.gui)
--- a/src/calibre/gui2/actions/tweak_epub.py
+++ b/src/calibre/gui2/actions/tweak_epub.py
@ -15,7 +15,7 @@ class TweakEpubAction(InterfaceAction):
    action_spec = (_('Tweak ePub'), 'trim.png',
            _('Make small changes to ePub format books'),
            _('T'))
-    dont_add_to = frozenset(['toolbar-device', 'context-menu-device'])
+    dont_add_to = frozenset(['menubar-device', 'toolbar-device', 'context-menu-device'])
    action_type = 'current'

    def genesis(self):
--- a/src/calibre/gui2/actions/view.py
+++ b/src/calibre/gui2/actions/view.py
@ -6,9 +6,8 @@ __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'

 import os, time
-from functools import partial

-from PyQt4.Qt import Qt, QMenu
+from PyQt4.Qt import Qt, QMenu, QAction, pyqtSignal

 from calibre.constants import isosx
 from calibre.gui2 import error_dialog, Dispatcher, question_dialog, config, \
@ -18,6 +17,19 @@ from calibre.utils.config import prefs
 from calibre.ptempfile import PersistentTemporaryFile
 from calibre.gui2.actions import InterfaceAction

+class HistoryAction(QAction):
+
+    view_historical = pyqtSignal(object)
+
+    def __init__(self, id_, title, parent):
+        QAction.__init__(self, title, parent)
+        self.id = id_
+        self.triggered.connect(self._triggered)
+
+    def _triggered(self):
+        self.view_historical.emit(self.id)
+
+
 class ViewAction(InterfaceAction):

    name = 'View'
@ -28,18 +40,51 @@ class ViewAction(InterfaceAction):
        self.persistent_files = []
        self.qaction.triggered.connect(self.view_book)
        self.view_menu = QMenu()
-        self.view_menu.addAction(_('View'), partial(self.view_book, False))
-        ac = self.view_menu.addAction(_('View specific format'))
-        ac.setShortcut((Qt.ControlModifier if isosx else Qt.AltModifier)+Qt.Key_V)
+        ac = self.view_specific_action = QAction(_('View specific format'),
+                self.gui)
        self.qaction.setMenu(self.view_menu)
+        ac.setShortcut(Qt.AltModifier+Qt.Key_V)
        ac.triggered.connect(self.view_specific_format, type=Qt.QueuedConnection)
-
-        self.view_menu.addSeparator()
+        ac = self.view_action = QAction(self.qaction.icon(),
+                self.qaction.text(), self.gui)
+        ac.triggered.connect(self.view_book)
        ac = self.create_action(spec=(_('Read a random book'), 'catalog.png',
            None, None), attr='action_pick_random')
        ac.triggered.connect(self.view_random)
-        self.view_menu.addAction(ac)
+        ac = self.clear_history_action = QAction(
+                _('Clear recently viewed list'), self.gui)
+        ac.triggered.connect(self.clear_history)

+    def initialization_complete(self):
+        self.build_menus(self.gui.current_db)
+
+    def build_menus(self, db):
+        self.view_menu.clear()
+        self.view_menu.addAction(self.qaction)
+        self.view_menu.addAction(self.view_specific_action)
+        self.view_menu.addSeparator()
+        self.view_menu.addAction(self.action_pick_random)
+        self.history_actions = []
+        history = db.prefs.get('gui_view_history', [])
+        if history:
+            self.view_menu.addSeparator()
+            for id_, title in history:
+                ac = HistoryAction(id_, title, self.view_menu)
+                self.view_menu.addAction(ac)
+                ac.view_historical.connect(self.view_historical)
+            self.view_menu.addSeparator()
+            self.view_menu.addAction(self.clear_history_action)
+
+    def clear_history(self):
+        db = self.gui.current_db
+        db.prefs['gui_view_history'] = []
+        self.build_menus(db)
+
+    def view_historical(self, id_):
+        self._view_calibre_books([id_])
+
+    def library_changed(self, db):
+        self.build_menus(db)

    def location_selected(self, loc):
        enabled = loc == 'library'
@ -47,15 +92,17 @@ class ViewAction(InterfaceAction):
            action.setEnabled(enabled)

    def view_format(self, row, format):
-        fmt_path = self.gui.library_view.model().db.format_abspath(row, format)
-        if fmt_path:
-            self._view_file(fmt_path)
+        id_ = self.gui.library_view.model().id(row)
+        self.view_format_by_id(id_, format)

    def view_format_by_id(self, id_, format):
-        fmt_path = self.gui.library_view.model().db.format_abspath(id_, format,
+        db = self.gui.current_db
+        fmt_path = db.format_abspath(id_, format,
                index_is_id=True)
        if fmt_path:
+            title = db.title(id_, index_is_id=True)
            self._view_file(fmt_path)
+            self.update_history([(id_, title)])

    def book_downloaded_for_viewing(self, job):
        if job.failed:
@ -162,6 +209,54 @@ class ViewAction(InterfaceAction):
        self.gui.iactions['Choose Library'].pick_random()
        self._view_books([self.gui.library_view.currentIndex()])

+    def _view_calibre_books(self, ids):
+        db = self.gui.current_db
+        views = []
+        for id_ in ids:
+            try:
+                formats = db.formats(id_, index_is_id=True)
+            except:
+                error_dialog(self.gui, _('Cannot view'),
+                    _('This book no longer exists in your library'), show=True)
+                self.update_history([], remove=set([id_]))
+                continue
+
+            title   = db.title(id_, index_is_id=True)
+            if not formats:
+                error_dialog(self.gui, _('Cannot view'),
+                    _('%s has no available formats.')%(title,), show=True)
+                continue
+
+            formats = formats.upper().split(',')
+
+            fmt = formats[0]
+            for format in prefs['input_format_order']:
+                if format in formats:
+                    fmt = format
+                    break
+            views.append((id_, title))
+            self.view_format_by_id(id_, fmt)
+
+        self.update_history(views)
+
+    def update_history(self, views, remove=frozenset()):
+        db = self.gui.current_db
+        if views:
+            seen = set()
+            history = []
+            for id_, title in views + db.prefs.get('gui_view_history', []):
+                if title not in seen:
+                    seen.add(title)
+                    history.append((id_, title))
+
+            db.prefs['gui_view_history'] = history[:10]
+            self.build_menus(db)
+        if remove:
+            history = db.prefs.get('gui_view_history', [])
+            history = [x for x in history if x[0] not in remove]
+            db.prefs['gui_view_history'] = history[:10]
+            self.build_menus(db)
+
    def _view_books(self, rows):
        if not rows or len(rows) == 0:
            self._launch_viewer()
@ -171,28 +266,8 @@ class ViewAction(InterfaceAction):
            return

        if self.gui.current_view() is self.gui.library_view:
-            for row in rows:
-                if hasattr(row, 'row'):
-                    row = row.row()
-
-                formats = self.gui.library_view.model().db.formats(row)
-                title   = self.gui.library_view.model().db.title(row)
-                if not formats:
-                    error_dialog(self.gui, _('Cannot view'),
-                        _('%s has no available formats.')%(title,), show=True)
-                    continue
-
-                formats = formats.upper().split(',')
-
-
-                in_prefs = False
-                for format in prefs['input_format_order']:
-                    if format in formats:
-                        in_prefs = True
-                        self.view_format(row, format)
-                        break
-                if not in_prefs:
-                    self.view_format(row, formats[0])
+            ids = list(map(self.gui.library_view.model().id, rows))
+            self._view_calibre_books(ids)
        else:
            paths = self.gui.current_view().model().paths(rows)
            for path in paths:
--- a/src/calibre/gui2/catalog/catalog_epub_mobi.py
+++ b/src/calibre/gui2/catalog/catalog_epub_mobi.py
@ -193,7 +193,10 @@ class PluginWidget(QWidget,Ui_Form):
        opts_dict['header_note_source_field'] = self.header_note_source_field_name

        # Append the output profile
+        try:
            opts_dict['output_profile'] = [load_defaults('page_setup')['output_profile']]
+        except:
+            opts_dict['output_profile'] = ['default']
        if False:
            print "opts_dict"
            for opt in sorted(opts_dict.keys()):
--- a/src/calibre/gui2/custom_column_widgets.py
+++ b/src/calibre/gui2/custom_column_widgets.py
@ -62,7 +62,7 @@ class Bool(Base):
        w = self.widgets[1]
        items = [_('Yes'), _('No'), _('Undefined')]
        icons = [I('ok.png'), I('list_remove.png'), I('blank.png')]
-        if tweaks['bool_custom_columns_are_tristate'] == 'no':
+        if not self.db.prefs.get('bools_are_tristate'):
            items = items[:-1]
            icons = icons[:-1]
        for icon, text in zip(icons, items):
@ -70,7 +70,7 @@ class Bool(Base):

    def setter(self, val):
        val = {None: 2, False: 1, True: 0}[val]
-        if tweaks['bool_custom_columns_are_tristate'] == 'no' and val == 2:
+        if not self.db.prefs.get('bools_are_tristate') and val == 2:
            val = 1
        self.widgets[1].setCurrentIndex(val)

@ -549,7 +549,7 @@ class BulkBool(BulkBase, Bool):
        value = None
        for book_id in book_ids:
            val = self.db.get_custom(book_id, num=self.col_id, index_is_id=True)
-            if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None:
+            if not self.db.prefs.get('bools_are_tristate') and val is None:
                val = False
            if value is not None and value != val:
                return None
@ -559,7 +559,7 @@ class BulkBool(BulkBase, Bool):
    def setup_ui(self, parent):
        self.make_widgets(parent, QComboBox)
        items = [_('Yes'), _('No')]
-        if tweaks['bool_custom_columns_are_tristate'] == 'no':
+        if not self.db.prefs.get('bools_are_tristate'):
            items.append('')
        else:
            items.append(_('Undefined'))
@ -571,7 +571,7 @@ class BulkBool(BulkBase, Bool):

    def getter(self):
        val = self.main_widget.currentIndex()
-        if tweaks['bool_custom_columns_are_tristate'] == 'no':
+        if not self.db.prefs.get('bools_are_tristate'):
            return {2: False, 1: False, 0: True}[val]
        else:
            return {2: None, 1: False, 0: True}[val]
@ -586,13 +586,13 @@ class BulkBool(BulkBase, Bool):
            return
        val = self.gui_val
        val = self.normalize_ui_val(val)
-        if tweaks['bool_custom_columns_are_tristate'] == 'no' and val is None:
+        if not self.db.prefs.get('bools_are_tristate') and val is None:
            val = False
        self.db.set_custom_bulk(book_ids, val, num=self.col_id, notify=notify)

    def a_c_checkbox_changed(self):
        if not self.ignore_change_signals:
-            if tweaks['bool_custom_columns_are_tristate'] == 'no' and \
+            if not self.db.prefs.get('bools_are_tristate') and \
                                    self.main_widget.currentIndex() == 2:
                self.a_c_checkbox.setChecked(False)
            else:
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@ -7,15 +7,25 @@
    <x>0</x>
    <y>0</y>
    <width>917</width>
-    <height>480</height>
+    <height>492</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Dialog</string>
  </property>
+  <property name="windowIcon">
+   <iconset resource="../../../../resources/images.qrc">
+    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
+  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0" colspan="2">
    <widget class="QLabel" name="title">
+     <property name="font">
+      <font>
+       <weight>75</weight>
+       <bold>true</bold>
+      </font>
+     </property>
     <property name="text">
      <string>TextLabel</string>
     </property>
@ -24,10 +34,26 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0">
+   <item row="1" column="0" rowspan="3">
    <widget class="CoverView" name="cover"/>
   </item>
   <item row="1" column="1">
+    <widget class="QScrollArea" name="scrollArea">
+     <property name="frameShape">
+      <enum>QFrame::NoFrame</enum>
+     </property>
+     <property name="widgetResizable">
+      <bool>true</bool>
+     </property>
+     <widget class="QWidget" name="scrollAreaWidgetContents">
+      <property name="geometry">
+       <rect>
+        <x>0</x>
+        <y>0</y>
+        <width>435</width>
+        <height>670</height>
+       </rect>
+      </property>
      <layout class="QVBoxLayout" name="verticalLayout">
       <item>
        <widget class="QLabel" name="text">
@ -72,14 +98,18 @@
         </layout>
        </widget>
       </item>
-     <item>
+      </layout>
+     </widget>
+    </widget>
+   </item>
+   <item row="2" column="1">
    <widget class="QCheckBox" name="fit_cover">
     <property name="text">
      <string>Fit &amp;cover within view</string>
     </property>
    </widget>
   </item>
-     <item>
+   <item row="3" column="1">
    <layout class="QHBoxLayout" name="horizontalLayout">
     <item>
      <widget class="QPushButton" name="previous_button">
@ -106,8 +136,6 @@
    </layout>
   </item>
  </layout>
-   </item>
-  </layout>
 </widget>
 <customwidgets>
  <customwidget>
--- a/src/calibre/gui2/dialogs/job_view.ui
+++ b/src/calibre/gui2/dialogs/job_view.ui
@ -1,3 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
 <ui version="4.0">
 <class>Dialog</class>
 <widget class="QDialog" name="Dialog">
@ -30,13 +31,16 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0" >
+   <item row="2" column="0">
    <widget class="QDialogButtonBox" name="buttonBox">
     <property name="standardButtons">
      <set>QDialogButtonBox::Ok</set>
     </property>
    </widget>
   </item>
+   <item row="1" column="0">
+    <widget class="QTextBrowser" name="tb"/>
+   </item>
  </layout>
 </widget>
 <resources>
--- a/src/calibre/gui2/dialogs/metadata_bulk.py
+++ b/src/calibre/gui2/dialogs/metadata_bulk.py
@ -519,6 +519,8 @@ class MetadataBulkDialog(ResizableDialog, Ui_MetadataBulkDialog):
                val = [] if fm['is_multiple'] else ['']
            elif not fm['is_multiple']:
                val = [val]
+            elif fm['datatype'] == 'composite':
+                val = [v.strip() for v in val.split(fm['is_multiple'])]
            elif field == 'authors':
                val = [v.replace('|', ',') for v in val]
        else:
--- a/src/calibre/gui2/dialogs/saved_search_editor.ui
+++ b/src/calibre/gui2/dialogs/saved_search_editor.ui
@ -90,7 +90,7 @@
       </property>
       <property name="icon">
        <iconset>
-         <normaloff>:/images/minus.png</normaloff>:/images/minus.png</iconset>
+         <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
       </property>
      </widget>
     </item>
--- a/src/calibre/gui2/dialogs/tag_categories.ui
+++ b/src/calibre/gui2/dialogs/tag_categories.ui
@ -79,7 +79,7 @@
     </property>
     <property name="icon">
      <iconset>
-       <normaloff>:/images/minus.png</normaloff>:/images/minus.png</iconset>
+       <normaloff>:/images/trash.png</normaloff>:/images/trash.png</iconset>
     </property>
    </widget>
   </item>
--- a/src/calibre/gui2/ebook_download.py
+++ b/src/calibre/gui2/ebook_download.py
@ -0,0 +1,106 @@
+# -*- coding: utf-8 -*-
+
+from __future__ import (unicode_literals, division, absolute_import, print_function)
+
+__license__ = 'GPL 3'
+__copyright__ = '2011, John Schember <john@nachtimwald.com>'
+__docformat__ = 'restructuredtext en'
+
+import os
+import shutil
+from contextlib import closing
+from mechanize import MozillaCookieJar
+
+from calibre import browser, get_download_filename
+from calibre.ebooks import BOOK_EXTENSIONS
+from calibre.gui2 import Dispatcher
+from calibre.gui2.threaded_jobs import ThreadedJob
+from calibre.ptempfile import PersistentTemporaryFile
+
+class EbookDownload(object):
+
+    def __call__(self, gui, cookie_file=None, url='', filename='', save_loc='', add_to_lib=True, tags=[], log=None, abort=None, notifications=None):
+        dfilename = ''
+        try:
+            dfilename = self._download(cookie_file, url, filename, save_loc, add_to_lib)
+            self._add(dfilename, gui, add_to_lib, tags)
+            self._save_as(dfilename, save_loc)
+        except Exception as e:
+            raise e
+        finally:
+            try:
+                if dfilename:
+                    os.remove(dfilename)
+            except:
+                pass
+
+    def _download(self, cookie_file, url, filename, save_loc, add_to_lib):
+        dfilename = ''
+
+        if not url:
+            raise Exception(_('No file specified to download.'))
+        if not save_loc and not add_to_lib:
+            # Nothing to do.
+            return dfilename
+
+        if not filename:
+            filename = get_download_filename(url, cookie_file)
+
+        br = browser()
+        if cookie_file:
+            cj = MozillaCookieJar()
+            cj.load(cookie_file)
+            br.set_cookiejar(cj)
+        with closing(br.open(url)) as r:
+            tf = PersistentTemporaryFile(suffix=filename)
+            tf.write(r.read())
+            dfilename = tf.name
+
+        return dfilename
+
+    def _add(self, filename, gui, add_to_lib, tags):
+        if not add_to_lib or not filename:
+            return
+        ext = os.path.splitext(filename)[1][1:].lower()
+        if ext not in BOOK_EXTENSIONS:
+            raise Exception(_('Not a support ebook format.'))
+
+        from calibre.ebooks.metadata.meta import get_metadata
+        with open(filename) as f:
+            mi = get_metadata(f, ext)
+        mi.tags.extend(tags)
+
+        id = gui.library_view.model().db.create_book_entry(mi)
+        gui.library_view.model().db.add_format_with_hooks(id, ext.upper(), filename, index_is_id=True)
+        gui.library_view.model().books_added(1)
+        gui.library_view.model().count_changed()
+
+    def _save_as(self, dfilename, save_loc):
+        if not save_loc or not dfilename:
+            return
+        shutil.copy(dfilename, save_loc)
+
+
+gui_ebook_download = EbookDownload()
+
+def start_ebook_download(callback, job_manager, gui, cookie_file=None, url='', filename='', save_loc='', add_to_lib=True, tags=[]):
+    description = _('Downloading %s') % filename if filename else url
+    job = ThreadedJob('ebook_download', description, gui_ebook_download, (gui, cookie_file, url, filename, save_loc, add_to_lib, tags), {}, callback, max_concurrent_count=2, killable=False)
+    job_manager.run_threaded_job(job)
+
+
+class EbookDownloadMixin(object):
+
+    def download_ebook(self, url='', cookie_file=None, filename='', save_loc='', add_to_lib=True, tags=[]):
+        if tags:
+            if isinstance(tags, basestring):
+                tags = tags.split(',')
+        start_ebook_download(Dispatcher(self.downloaded_ebook), self.job_manager, self, cookie_file, url, filename, save_loc, add_to_lib, tags)
+        self.status_bar.show_message(_('Downloading') + ' ' + filename if filename else url, 3000)
+
+    def downloaded_ebook(self, job):
+        if job.failed:
+            self.job_exception(job, dialog_title=_('Failed to download ebook'))
+            return
+
+        self.status_bar.show_message(job.description + ' ' + _('finished'), 5000)
--- a/Show More
+++ b/Show More