Sync to trunk.

2025-12-11 15:45:03 -05:00 · 2011-04-06 18:54:01 -04:00 · 2011-04-06 18:54:01 -04:00 · 225c7447cb
commit 225c7447cb
parent 739609210e 58899e65ef
44 changed files with 1310 additions and 285 deletions
--- a/8
+++ b/8
@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
+
-the facilities of the calibre-debug command to hack on the calibre source. 
+Note that you *do not* need to install from source to hack on
 the calibre source code. To get started with calibre development,
 use a normal calibre install and follow the instructions at
 http://calibre-ebook.com/user_manual/develop.html
 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@ -45,3 +48,4 @@ This type of install can be run with the command::
    sudo python setup.py develop
 Use the -h flag for help on the develop command.
--- a/2
+++ b/2
@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
 For screenshots: https://calibre-ebook.com/demo
 For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual
 For source code access:
 bzr branch lp:calibre
--- a/recipes/developpez.recipe
+++ b/recipes/developpez.recipe
@ -0,0 +1,21 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1301849956(BasicNewsRecipe):
    title          = u'Developpez.com'
    description = u'Toutes les news du site Developpez.com'
    publisher = u'Developpez.com'
    timefmt = ' [%a, %d %b, %Y]'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    encoding = 'ISO-8859-1'
    language = 'fr'
    __author__ = 'louhike'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
    feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
    def get_cover_url(self):
        return 'http://javascript.developpez.com/template/images/logo.gif'
--- a/recipes/f_secure.recipe
+++ b/recipes/f_secure.recipe
@ -0,0 +1,22 @@
 from calibre.web.feeds.news import BasicNewsRecipe
 class AdvancedUserRecipe1301860159(BasicNewsRecipe):
    title          = u'F-Secure Weblog'
    language = 'en'
    __author__ = 'louhike'
    description = u'All the news from the weblog of F-Secure'
    publisher = u'F-Secure'
    timefmt = ' [%a, %d %b, %Y]'
    encoding = 'ISO-8859-1'
    oldest_article = 7
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content   = False
    language = 'en_EN'
    remove_javascript = True
    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
    remove_tags = [dict(name='a'),dict(name='hr')]
    feeds          = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
    def get_cover_url(self):
        return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
--- a/recipes/kommersant.recipe
+++ b/recipes/kommersant.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kommersant.ru
 '''
@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe):
    language              = 'ru'
    publication_type      = 'newspaper'
    masthead_url          = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
-    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
+    extra_css             = """ 
 	                          @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
 	                          body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif}
 							  .title{font-size: x-large; font-weight: bold; margin-bottom: 1em}
 							  .subtitle{font-size: large; margin-bottom: 1em}
 							  .document_vvodka{font-weight: bold; margin-bottom: 1em}
 							"""
    conversion_options = {
                          'comment'          : description
@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe):
                        , 'language'         : language
                        }
-    keep_only_tags = [
+    keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
-                         dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
+    remove_tags    = [dict(name=['iframe','object','link','img','base','meta'])]
                        ,dict(attrs={'class':['vvodka','paragraph','author']})
                     ]
    remove_tags        = [dict(name=['iframe','object','link','img','base'])]
    feeds       = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
    def print_version(self, url):	    
-        return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'
+        return url.replace('/doc-rss/','/Doc/') + '/Print'
--- a/recipes/perfil.recipe
+++ b/recipes/perfil.recipe
@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 perfil.com
 '''
@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
                      dict(name=['iframe','embed','object','base','meta','link'])
                     ,dict(name='a', attrs={'href':'#comentarios'})
                     ,dict(name='div', attrs={'class':'foto3'})
-                     ,dict(name='img', attrs={'alt':'ampliar'})
+                     ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
                    ]
-    keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
+    keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
    remove_attributes=['onload','lang','width','height','border']
    feeds = [
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@ -1,3 +1,4 @@
 import re
 from calibre.web.feeds.news import BasicNewsRecipe
 class TimesOfIndia(BasicNewsRecipe):
@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe):
    max_articles_per_feed = 25
    no_stylesheets = True
-    keep_only_tags = [dict(attrs={'class':'maintable12'})]
+    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
    remove_tags = [
            dict(style=lambda x: x and 'float' in x),
-            dict(attrs={'class':'prvnxtbg'}),
+            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
    ]
    feeds          = [
@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
 'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
-    def print_version(self, url):
+
-        return url + '?prtpage=1'
+    def get_article_url(self, article):
        url = BasicNewsRecipe.get_article_url(self, article)
        if '/0Ltimesofindia' in url:
            url = url.partition('/0L')[-1]
            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
                    '/').replace('0E', '-')
            url = 'http://' + url.rpartition('/')[0]
            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
            if match is not None:
                num = match.group(1)
                num = re.sub(r'[^0-9]', '', num)
                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
                    num)
        else:
            cms = re.search(r'/(\d+)\.cms', url)
            if cms is not None:
                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
                    cms.group(1))
        return url
    def preprocess_html(self, soup):
        return soup
--- a/src/calibre/init.py
+++ b/src/calibre/init.py
@ -217,8 +217,19 @@ def filename_to_utf8(name):
    return name.decode(codec, 'replace').encode('utf8')
 def extract(path, dir):
    ext = os.path.splitext(path)[1][1:].lower()
    extractor = None
    # First use the file header to identify its type
    with open(path, 'rb') as f:
        id_ = f.read(3)
    if id_ == b'Rar':
        from calibre.libunrar import extract as rarextract
        extractor = rarextract
    elif id_.startswith(b'PK'):
        from calibre.libunzip import extract as zipextract
        extractor = zipextract
    if extractor is None:
        # Fallback to file extension
        ext = os.path.splitext(path)[1][1:].lower()
        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
            from calibre.libunzip import extract as zipextract
            extractor = zipextract
@ -281,16 +292,17 @@ def get_parsed_proxy(typ='http', debug=True):
 def random_user_agent():
    choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
        'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
        'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
    ]
    #return choices[-1]
    return choices[random.randint(0, len(choices)-1)]
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@ -10,6 +10,7 @@ from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.utils.config import test_eight_code
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@ -166,6 +167,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
    description = _('Extract cover from comic files')
    def get_metadata(self, stream, ftype):
        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
            pos = stream.tell()
            id_ = stream.read(3)
            stream.seek(pos)
            if id_ == b'Rar':
                ftype = 'cbr'
            elif id.startswith(b'PK'):
                ftype = 'cbz'
        if ftype == 'cbr':
            from calibre.libunrar import extract_first_alphabetically as extract_first
            extract_first
@ -604,20 +613,34 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
    KentDistrictLibrary
 from calibre.ebooks.metadata.douban import DoubanBooks
 from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
 from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
        AmazonCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        Epubcheck, ]
-        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
+
-        NiceBooksCovers]
+if test_eight_code:
 # New metadata download plugins {{{
    from calibre.ebooks.metadata.sources.google import GoogleBooks
    from calibre.ebooks.metadata.sources.amazon import Amazon
    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
    plugins += [GoogleBooks, Amazon, OpenLibrary]
 # }}}
 else:
    from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
        KentDistrictLibrary
    from calibre.ebooks.metadata.douban import DoubanBooks
    from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
            AmazonCovers, DoubanCovers
    plugins += [GoogleBooks, ISBNDB, Amazon,
        OpenLibraryCovers, AmazonCovers, DoubanCovers,
        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
 plugins += [
    ComicInput,
    EPUBInput,
@ -1055,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 #}}}
 # New metadata download plugins {{{
 from calibre.ebooks.metadata.sources.google import GoogleBooks
 from calibre.ebooks.metadata.sources.amazon import Amazon
 from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 plugins += [GoogleBooks, Amazon, OpenLibrary]
 # }}}
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@ -36,7 +36,9 @@ class ANDROID(USBMS):
            # Motorola
            0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                       0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
                       0x7086 : [0x0226],
                     },
            # Sony Ericsson
            0xfce : { 0xd12e : [0x0100]},
@ -101,7 +103,8 @@ class ANDROID(USBMS):
            'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
            'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
            'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
            'MB860']
    WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
            'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
            'A70S', 'A101IT', '7']
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS):
    BCD         = [0x0324]
    VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902',
            'PB903', 'PB']
 class POCKETBOOK701(USBMS):
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@ -125,7 +125,10 @@ class Metadata(object):
        _data = object.__getattribute__(self, '_data')
        if field in TOP_LEVEL_IDENTIFIERS:
            field, val = self._clean_identifier(field, val)
-            _data['identifiers'].update({field: val})
+            identifiers = _data['identifiers']
            identifiers.pop(field, None)
            if val:
                identifiers[field] = val
        elif field == 'identifiers':
            if not val:
                val = copy.copy(NULL_VALUES.get('identifiers', None))
@ -224,8 +227,7 @@ class Metadata(object):
        identifiers = object.__getattribute__(self,
            '_data')['identifiers']
-        if not val and typ in identifiers:
+        identifiers.pop(typ, None)
            identifiers.pop(typ)
        if val:
            identifiers[typ] = val
@ -647,7 +649,7 @@ class Metadata(object):
            fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
        if self.series:
            fmt('Series', self.series + ' #%s'%self.format_series_index())
-        if self.language:
+        if not self.is_null('language'):
            fmt('Language', self.language)
        if self.rating is not None:
            fmt('Rating', self.rating)
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@ -193,6 +193,7 @@ class ResultList(list):
 def search(title=None, author=None, publisher=None, isbn=None,
           min_viewability='none', verbose=False, max_results=40):
    br   = browser()
    br.set_handle_gzip(True)
    start, entries = 1, []
    while start > 0 and len(entries) <= max_results:
        new, start = Query(title=title, author=author, publisher=publisher,
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{
    '''
    Get book details from amazons book page in a separate thread
@ -64,7 +64,7 @@ class Worker(Thread): # {{{
        raw = xml_to_unicode(raw, strip_encoding_pats=True,
                resolve_entities=True)[0]
-        # open('/t/t.html', 'wb').write(raw)
+        #open('/t/t.html', 'wb').write(raw)
        if '<title>404 - ' in raw:
            self.log.error('URL malformed: %r'%self.url)
@ -218,6 +218,9 @@ class Worker(Thread): # {{{
                    ' @class="emptyClear" or @href]'):
                c.getparent().remove(c)
            desc = tostring(desc, method='html', encoding=unicode).strip()
            # Encoding bug in Amazon data U+fffd (replacement char)
            # in some examples it is present in place of '
            desc = desc.replace('\ufffd', "'")
            # remove all attributes from tags
            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
            # Collapse whitespace
@ -276,12 +279,14 @@ class Worker(Thread): # {{{
 class Amazon(Source):
-    name = 'Amazon'
+    name = 'Amazon Metadata'
    description = _('Downloads metadata from Amazon')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
    has_html_comments = True
    supports_gzip_transfer_encoding = True
    AMAZON_DOMAINS = {
            'com': _('US'),
@ -408,6 +413,18 @@ class Amazon(Source):
                    if 'bulk pack' not in title:
                        matches.append(a.get('href'))
                    break
            if not matches:
                # This can happen for some user agents that Amazon thinks are
                # mobile/less capable
                log('Trying alternate results page markup')
                for td in root.xpath(
                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
                    for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
                        title = tostring(a, method='text', encoding=unicode).lower()
                        if 'bulk pack' not in title:
                            matches.append(a.get('href'))
                        break
        # Keep only the top 5 matches as the matches are sorted by relevance by
        # Amazon so lower matches are not likely to be very relevant
@ -476,9 +493,10 @@ class Amazon(Source):
        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
    # }}}
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@ -15,9 +15,20 @@ from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
 from calibre.utils.icu import capitalize, lower
 from calibre.ebooks.metadata import check_isbn
-msprefs = JSONConfig('metadata_sources.json')
+msprefs = JSONConfig('metadata_sources/global.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
 # Google covers are often poor quality (scans/errors) but they have high
 # resolution, so they trump covers from better sources. So make sure they
 # are only used if no other covers are found.
 msprefs.defaults['cover_priorities'] = {'Google':2}
 def create_log(ostream=None):
    log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@ -89,6 +100,39 @@ class InternalMetadataCompareKeyGen(object):
 # }}}
 def get_cached_cover_urls(mi):
    from calibre.customize.ui import metadata_plugins
    plugins = list(metadata_plugins(['identify']))
    for p in plugins:
        url = p.get_cached_cover_url(mi.identifiers)
        if url:
            yield (p, url)
 def cap_author_token(token):
    lt = lower(token)
    if lt in ('von', 'de', 'el', 'van', 'le'):
        return lt
    if re.match(r'([a-z]\.){2,}$', lt) is not None:
        # Normalize tokens of the form J.K. to J. K.
        parts = token.split('.')
        return '. '.join(map(capitalize, parts)).strip()
    return capitalize(token)
 def fixauthors(authors):
    if not authors:
        return authors
    ans = []
    for x in authors:
        ans.append(' '.join(map(cap_author_token, x.split())))
    return ans
 def fixcase(x):
    if x:
        x = titlecase(x)
    return x
 class Source(Plugin):
    type = _('Metadata source')
@ -104,6 +148,15 @@ class Source(Plugin):
    #: during the identify phase
    touched_fields = frozenset()
    #: Set this to True if your plugin return HTML formatted comments
    has_html_comments = False
    #: Setting this to True means that the browser object will add
    #: Accept-Encoding: gzip to all requests. This can speedup downloads
    #: but make sure that the source actually supports gzip transfer encoding
    #: correctly first
    supports_gzip_transfer_encoding = False
    def __init__(self, *args, **kwargs):
        Plugin.__init__(self, *args, **kwargs)
        self._isbn_to_identifier_cache = {}
@ -127,6 +180,8 @@ class Source(Plugin):
    def browser(self):
        if self._browser is None:
            self._browser = browser(user_agent=random_user_agent())
            if self.supports_gzip_transfer_encoding:
                self._browser.set_handle_gzip(True)
        return self._browser.clone_browser()
    # }}}
@ -229,13 +284,9 @@ class Source(Plugin):
        before putting the Metadata object into result_queue. You can of
        course, use a custom algorithm suited to your metadata source.
        '''
        def fixcase(x):
            if x:
                x = titlecase(x)
            return x
        if mi.title:
            mi.title = fixcase(mi.title)
-        mi.authors = list(map(fixcase, mi.authors))
+        mi.authors = fixauthors(mi.authors)
        mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)
@ -316,7 +367,8 @@ class Source(Plugin):
            title=None, authors=None, identifiers={}, timeout=30):
        '''
        Download a cover and put it into result_queue. The parameters all have
-        the same meaning as for :meth:`identify`.
+        the same meaning as for :meth:`identify`. Put (self, cover_data) into
        result_queue.
        This method should use cached cover URLs for efficiency whenever
        possible. When cached data is not present, most plugins simply call
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@ -0,0 +1,105 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import sys, textwrap
 from io import BytesIO
 from threading import Event
 from calibre import prints
 from calibre.utils.config import OptionParser
 from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ebooks.metadata import string_to_authors
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
 from calibre.utils.config import test_eight_code
 def option_parser():
    if not test_eight_code:
        from calibre.ebooks.metadata.fetch import option_parser
        return option_parser()
    parser = OptionParser(textwrap.dedent(
        '''\
        %prog [options]
        Fetch book metadata from online sources. You must specify at least one
        of title, authors or ISBN.
        '''
    ))
    parser.add_option('-t', '--title', help='Book title')
    parser.add_option('-a', '--authors', help='Book author(s)')
    parser.add_option('-i', '--isbn', help='Book ISBN')
    parser.add_option('-v', '--verbose', default=False, action='store_true',
                      help='Print the log to the console (stderr)')
    parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
    parser.add_option('-c', '--cover',
            help='Specify a filename. The cover, if available, will be saved to it')
    parser.add_option('-d', '--timeout', default='30',
            help='Timeout in seconds. Default is 30')
    return parser
 def main(args=sys.argv):
    if not test_eight_code:
        from calibre.ebooks.metadata.fetch import main
        return main(args)
    parser = option_parser()
    opts, args = parser.parse_args(args)
    buf = BytesIO()
    log = create_log(buf)
    abort = Event()
    authors = []
    if opts.authors:
        authors = string_to_authors(opts.authors)
    identifiers = {}
    if opts.isbn:
        identifiers['isbn'] = opts.isbn
    results = identify(log, abort, title=opts.title, authors=authors,
            identifiers=identifiers, timeout=int(opts.timeout))
    if not results:
        print (log, file=sys.stderr)
        prints('No results found', file=sys.stderr)
        raise SystemExit(1)
    result = results[0]
    cf = None
    if opts.cover and results:
        cover = download_cover(log, title=opts.title, authors=authors,
                identifiers=result.identifiers, timeout=int(opts.timeout))
        if cover is None:
            prints('No cover found', file=sys.stderr)
        else:
            save_cover_data_to(cover[-1], opts.cover)
            result.cover = cf = opts.cover
    log = buf.getvalue()
    result = (metadata_to_opf(result) if opts.opf else
                    unicode(result).encode('utf-8'))
    if opts.verbose:
        print (log, file=sys.stderr)
    print (result)
    if not opts.opf and opts.cover:
        prints('Cover               :', cf)
    return 0
 if __name__ == '__main__':
    sys.exit(main())
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@ -0,0 +1,178 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 from Queue import Queue, Empty
 from threading import Thread, Event
 from io import BytesIO
 from calibre.customize.ui import metadata_plugins
 from calibre.ebooks.metadata.sources.base import msprefs, create_log
 from calibre.utils.magick.draw import Image, save_cover_data_to
 class Worker(Thread):
    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
        Thread.__init__(self)
        self.daemon = True
        self.plugin = plugin
        self.abort = abort
        self.buf = BytesIO()
        self.log = create_log(self.buf)
        self.title, self.authors, self.identifiers = (title, authors,
                identifiers)
        self.timeout, self.rq = timeout, rq
        self.time_spent = None
    def run(self):
        start_time = time.time()
        if not self.abort.is_set():
            try:
                self.plugin.download_cover(self.log, self.rq, self.abort,
                    title=self.title, authors=self.authors,
                    identifiers=self.identifiers, timeout=self.timeout)
            except:
                self.log.exception('Failed to download cover from',
                        self.plugin.name)
        self.time_spent = time.time() - start_time
 def is_worker_alive(workers):
    for w in workers:
        if w.is_alive():
            return True
    return False
 def process_result(log, result):
    plugin, data = result
    try:
        im = Image()
        im.load(data)
        im.trim(10)
        width, height = im.size
        fmt = im.format
        if width < 50 or height < 50:
            raise ValueError('Image too small')
        data = save_cover_data_to(im, '/cover.jpg', return_data=True)
    except:
        log.exception('Invalid cover from', plugin.name)
        return None
    return (plugin, width, height, fmt, data)
 def run_download(log, results, abort,
        title=None, authors=None, identifiers={}, timeout=30):
    '''
    Run the cover download, putting results into the queue :param:`results`.
    Each result is a tuple of the form:
        (plugin, width, height, fmt, bytes)
    '''
    plugins = list(metadata_plugins(['cover']))
    rq = Queue()
    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
            in plugins]
    for w in workers:
        w.start()
    first_result_at = None
    wait_time = msprefs['wait_after_first_cover_result']
    found_results = {}
    while True:
        time.sleep(0.1)
        try:
            x = rq.get_nowait()
            result = process_result(log, x)
            if result is not None:
                results.put(result)
                found_results[result[0]] = result
                if first_result_at is not None:
                    first_result_at = time.time()
        except Empty:
            pass
        if not is_worker_alive(workers):
            break
        if first_result_at is not None and time.time() - first_result_at > wait_time:
            log('Not waiting for any more results')
            abort.set()
        if abort.is_set():
            break
    while True:
        try:
            x = rq.get_nowait()
            result = process_result(log, x)
            if result is not None:
                results.put(result)
                found_results[result[0]] = result
        except Empty:
            break
    for w in workers:
        wlog = w.buf.getvalue().strip()
        log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
        log('Request extra headers:', w.plugin.browser.addheaders)
        if w.plugin in found_results:
            result = found_results[w.plugin]
            log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
        else:
            log('Failed to download valid cover')
        if w.time_spent is None:
            log('Download aborted')
        else:
            log('Took', w.time_spent, 'seconds')
        if wlog:
            log(wlog)
        log('\n'+'*'*80)
 def download_cover(log,
        title=None, authors=None, identifiers={}, timeout=30):
    '''
    Synchronous cover download. Returns the "best" cover as per user
    prefs/cover resolution.
    Return cover is a tuple: (plugin, width, height, fmt, data)
    Returns None if no cover is found.
    '''
    rq = Queue()
    abort = Event()
    run_download(log, rq, abort, title=title, authors=authors,
            identifiers=identifiers, timeout=timeout)
    results = []
    while True:
        try:
            results.append(rq.get_nowait())
        except Empty:
            break
    cp = msprefs['cover_priorities']
    def keygen(result):
        plugin, width, height, fmt, data = result
        return (cp.get(plugin.name, 1), 1/(width*height))
    results.sort(key=keygen)
    return results[0] if results else None
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@ -145,21 +145,25 @@ def to_metadata(browser, log, entry_, timeout): # {{{
            log.exception('Failed to parse rating')
    # Cover
-    mi.has_google_cover = len(extra.xpath(
+    mi.has_google_cover = None
-        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
+    for x in extra.xpath(
            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
        mi.has_google_cover = x.get('href')
        break
    return mi
 # }}}
 class GoogleBooks(Source):
-    name = 'Google Books'
+    name = 'Google'
    description = _('Downloads metadata from Google Books')
    capabilities = frozenset(['identify', 'cover'])
    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
        'comments', 'publisher', 'identifier:isbn', 'rating',
        'identifier:google']) # language currently disabled
    supports_gzip_transfer_encoding = True
    GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
@ -212,7 +216,7 @@ class GoogleBooks(Source):
            results.sort(key=self.identify_results_keygen(
                title=title, authors=authors, identifiers=identifiers))
            for mi in results:
-                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                cached_url = self.get_cached_cover_url(mi.identifiers)
                if cached_url is not None:
                    break
        if cached_url is None:
@ -222,9 +226,10 @@ class GoogleBooks(Source):
        if abort.is_set():
            return
        br = self.browser
        log('Downloading cover from:', cached_url)
        try:
            cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
        except:
            log.exception('Failed to download cover from:', cached_url)
@ -270,6 +275,9 @@ class GoogleBooks(Source):
            identifiers={}, timeout=30):
        query = self.create_query(log, title=title, authors=authors,
                identifiers=identifiers)
        if not query:
            log.error('Insufficient metadata to construct query')
            return
        br = self.browser
        try:
            raw = br.open_novisit(query, timeout=timeout).read()
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@ -8,17 +8,21 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 import time
 from datetime import datetime
 from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
 from operator import attrgetter
 from calibre.customize.ui import metadata_plugins
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import create_log, msprefs
 from calibre.ebooks.metadata.xisbn import xisbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
 from calibre.utils.icu import lower
-# How long to wait for more results after first result is found
+# Download worker {{{
 WAIT_AFTER_FIRST_RESULT = 30 # seconds
 class Worker(Thread):
    def __init__(self, plugin, kwargs, abort):
@ -31,10 +35,12 @@ class Worker(Thread):
        self.log = create_log(self.buf)
    def run(self):
        start = time.time()
        try:
            self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
        except:
            self.log.exception('Plugin', self.plugin.name, 'failed')
        self.plugin.dl_time_spent = time.time() - start
 def is_worker_alive(workers):
    for w in workers:
@ -42,9 +48,209 @@ def is_worker_alive(workers):
            return True
    return False
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+# }}}
 # Merge results from different sources {{{
 class ISBNMerge(object):
    def __init__(self):
        self.pools = {}
        self.isbnless_results = []
    def isbn_in_pool(self, isbn):
        if isbn:
            for isbns, pool in self.pools.iteritems():
                if isbn in isbns:
                    return pool
        return None
    def pool_has_result_from_same_source(self, pool, result):
        results = pool[1]
        for r in results:
            if r.identify_plugin is result.identify_plugin:
                return True
        return False
    def add_result(self, result):
        isbn = result.isbn
        if isbn:
            pool = self.isbn_in_pool(isbn)
            if pool is None:
                isbns, min_year = xisbn.get_isbn_pool(isbn)
                if not isbns:
                    isbns = frozenset([isbn])
                self.pools[isbns] = pool = (min_year, [])
            if not self.pool_has_result_from_same_source(pool, result):
                pool[1].append(result)
        else:
            self.isbnless_results.append(result)
    def finalize(self):
        has_isbn_result = False
        for results in self.pools.itervalues():
            if results:
                has_isbn_result = True
                break
        self.has_isbn_result = has_isbn_result
        if has_isbn_result:
            self.merge_isbn_results()
        else:
            results = sorted(self.isbnless_results,
                    key=attrgetter('relevance_in_source'))
            # Pick only the most relevant result from each source
            self.results = []
            seen = set()
            for result in results:
                if result.identify_plugin not in seen:
                    seen.add(result.identify_plugin)
                    self.results.append(result)
                    result.average_source_relevance = \
                        result.relevance_in_source
        self.merge_metadata_results()
        return self.results
    def merge_metadata_results(self):
        ' Merge results with identical title and authors '
        groups = {}
        for result in self.results:
            title = lower(result.title if result.title else '')
            key = (title, tuple([lower(x) for x in result.authors]))
            if key not in groups:
                groups[key] = []
            groups[key].append(result)
        if len(groups) != len(self.results):
            self.results = []
            for rgroup in groups.itervalues():
                rel = [r.average_source_relevance for r in rgroup]
                if len(rgroup) > 1:
                    result = self.merge(rgroup, None, do_asr=False)
                    result.average_source_relevance = sum(rel)/len(rel)
                else:
                    result = rgroup[0]
                self.results.append(result)
        self.results.sort(key=attrgetter('average_source_relevance'))
    def merge_isbn_results(self):
        self.results = []
        for min_year, results in self.pools.itervalues():
            if results:
                self.results.append(self.merge(results, min_year))
        self.results.sort(key=attrgetter('average_source_relevance'))
    def length_merge(self, attr, results, null_value=None, shortest=True):
        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
        values = [x for x in values if len(x) > 0]
        if not values:
            return null_value
        values.sort(key=len, reverse=not shortest)
        return values[0]
    def random_merge(self, attr, results, null_value=None):
        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
        return values[0] if values else null_value
    def merge(self, results, min_year, do_asr=True):
        ans = Metadata(_('Unknown'))
        # We assume the shortest title has the least cruft in it
        ans.title = self.length_merge('title', results, null_value=ans.title)
        # No harm in having extra authors, maybe something useful like an
        # editor or translator
        ans.authors = self.length_merge('authors', results,
                null_value=ans.authors, shortest=False)
        # We assume the shortest publisher has the least cruft in it
        ans.publisher = self.length_merge('publisher', results,
                null_value=ans.publisher)
        # We assume the smallest set of tags has the least cruft in it
        ans.tags = self.length_merge('tags', results,
                null_value=ans.tags)
        # We assume the longest series has the most info in it
        ans.series = self.length_merge('series', results,
                null_value=ans.series, shortest=False)
        for r in results:
            if r.series and r.series == ans.series:
                ans.series_index = r.series_index
                break
        # Average the rating over all sources
        ratings = []
        for r in results:
            rating = r.rating
            if rating and rating > 0 and rating <= 5:
                ratings.append(rating)
        if ratings:
            ans.rating = sum(ratings)/len(ratings)
        # Smallest language is likely to be valid
        ans.language = self.length_merge('language', results,
                null_value=ans.language)
        # Choose longest comments
        ans.comments = self.length_merge('comments', results,
                null_value=ans.comments, shortest=False)
        # Published date
        if min_year:
            min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
            ans.pubdate = min_date
        else:
            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
            for r in results:
                if r.pubdate is not None and r.pubdate < min_date:
                    min_date = r.pubdate
            if min_date.year < 3000:
                ans.pubdate = min_date
        # Identifiers
        for r in results:
            ans.identifiers.update(r.identifiers)
        # Merge any other fields with no special handling (random merge)
        touched_fields = set()
        for r in results:
            if hasattr(r, 'identify_plugin'):
                touched_fields |= r.identify_plugin.touched_fields
        for f in touched_fields:
            if f.startswith('identifier:') or not ans.is_null(f):
                continue
            setattr(ans, f, self.random_merge(f, results,
                null_value=getattr(ans, f)))
        if do_asr:
            avg = [x.relevance_in_source for x in results]
            avg = sum(avg)/len(avg)
            ans.average_source_relevance = avg
        return ans
 def merge_identify_results(result_map, log):
    isbn_merge = ISBNMerge()
    for plugin, results in result_map.iteritems():
        for result in results:
            isbn_merge.add_result(result)
    return isbn_merge.finalize()
 # }}}
 def identify(log, abort, # {{{
        title=None, authors=None, identifiers={}, timeout=30):
    start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
+    plugins = list(metadata_plugins(['identify']))
    kwargs = {
            'title': title,
@ -56,14 +262,17 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
    log('Running identify query with parameters:')
    log(kwargs)
    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
+    log('The log from individual plugins is below')
    workers = [Worker(p, kwargs, abort) for p in plugins]
    for w in workers:
        w.start()
    first_result_at = None
-    results = dict.fromkeys(plugins, [])
+    results = {}
    for p in plugins:
        results[p] = []
    logs = dict([(w.plugin, w.buf) for w in workers])
    def get_results():
        found = False
@ -77,6 +286,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
                found = True
        return found
    wait_time = msprefs['wait_after_first_identify_result']
    while True:
        time.sleep(0.2)
@ -86,76 +296,118 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
        if not is_worker_alive(workers):
            break
-        if (first_result_at is not None and time.time() - first_result_at <
+        if (first_result_at is not None and time.time() - first_result_at >
-                WAIT_AFTER_FIRST_RESULT):
+                wait_time):
            log('Not waiting any longer for more results')
            abort.set()
            break
-    get_results()
+    while not abort.is_set() and get_results():
        pass
    sort_kwargs = dict(kwargs)
    for k in list(sort_kwargs.iterkeys()):
        if k not in ('title', 'authors', 'identifiers'):
            sort_kwargs.pop(k)
-    for plugin, results in results.iteritems():
+    longest, lp = -1, ''
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+    for plugin, presults in results.iteritems():
-        plog = plugin.buf.getvalue().strip()
+        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
        plog = logs[plugin].getvalue().strip()
        log('\n'+'*'*30, plugin.name, '*'*30)
        log('Request extra headers:', plugin.browser.addheaders)
        log('Found %d results'%len(presults))
        time_spent = getattr(plugin, 'dl_time_spent', None)
        if time_spent is None:
            log('Downloading was aborted')
            longest, lp = -1, plugin.name
        else:
            log('Downloading from', plugin.name, 'took', time_spent)
            if time_spent > longest:
                longest, lp = time_spent, plugin.name
        for r in presults:
            log('\n\n---')
            log(unicode(r))
        if plog:
            log('\n'+'*'*35, plugin.name, '*'*35)
            log('Found %d results'%len(results))
            log(plog)
        log('\n'+'*'*80)
-        for i, result in enumerate(results):
+        for i, result in enumerate(presults):
            result.relevance_in_source = i
            result.has_cached_cover_url = \
                plugin.get_cached_cover_url(result.identifiers) is not None
            result.identify_plugin = plugin
    log('The identify phase took %.2f seconds'%(time.time() - start_time))
    log('The longest time (%f) was taken by:'%longest, lp)
    log('Merging results from different sources and finding earliest',
            'publication dates')
    start_time = time.time()
-    merged_results = merge_identify_results(results, log)
+    results = merge_identify_results(results, log)
    log('We have %d merged results, merging took: %.2f seconds' %
-            (len(merged_results), time.time() - start_time))
+            (len(results), time.time() - start_time))
-class ISBNMerge(object):
+    if msprefs['txt_comments']:
    def __init__(self):
        self.pools = {}
    def isbn_in_pool(self, isbn):
        if isbn:
            for p in self.pools:
                if isbn in p:
                    return p
        return None
    def pool_has_result_from_same_source(self, pool, result):
        results = self.pools[pool][1]
        for r in results:
-            if r.identify_plugin is result.identify_plugin:
+            if r.plugin.has_html_comments and r.comments:
-                return True
+                r.comments = html2text(r.comments)
        return False
-    def add_result(self, result, isbn):
+    dummy = Metadata(_('Unknown'))
-        pool = self.isbn_in_pool(isbn)
+    max_tags = msprefs['max_tags']
-        if pool is None:
+    for r in results:
-            isbns, min_year = xisbn.get_isbn_pool(isbn)
+        for f in msprefs['ignore_fields']:
-            if not isbns:
+            setattr(r, f, getattr(dummy, f))
-                isbns = frozenset([isbn])
+        r.tags = r.tags[:max_tags]
            self.pool[isbns] = pool = (min_year, [])
-        if not self.pool_has_result_from_same_source(pool, result):
+    return results
-            pool[1].append(result)
+# }}}
-def merge_identify_results(result_map, log):
+if __name__ == '__main__': # tests {{{
-    for plugin, results in result_map.iteritems():
+    # To run these test use: calibre-debug -e
-        for result in results:
+    # src/calibre/ebooks/metadata/sources/identify.py
-            isbn = result.isbn
+    from calibre.ebooks.metadata.sources.test import (test_identify,
-            if isbn:
+            title_test, authors_test)
-                isbns, min_year = xisbn.get_isbn_pool(isbn)
+    tests = [
            ( # An e-book ISBN not on Amazon, one of the authors is
              # unknown to Amazon
                {'identifiers':{'isbn': '9780307459671'},
                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
            ),
            (  # Test absence of identifiers
                {'title':'Learning Python',
                    'authors':['Lutz']},
                [title_test('Learning Python',
                    exact=True), authors_test(['Mark Lutz'])
                 ]
            ),
            ( # Sophisticated comment formatting
                {'identifiers':{'isbn': '9781416580829'}},
                [title_test('Angels & Demons',
                    exact=True), authors_test(['Dan Brown'])]
            ),
            ( # No ISBN
                {'title':'Justine', 'authors':['Durrel']},
                [title_test('Justine', exact=True),
                    authors_test(['Lawrence Durrel'])]
            ),
            (  # A newer book
                {'identifiers':{'isbn': '9780316044981'}},
                [title_test('The Heroes', exact=True),
                    authors_test(['Joe Abercrombie'])]
            ),
        ]
    #test_identify(tests[1:2])
    test_identify(tests)
 # }}}
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@ -0,0 +1,40 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from calibre.ebooks.metadata.sources.base import Source
 class ISBNDB(Source):
    name = 'ISBNDB'
    description = _('Downloads metadata from isbndb.com')
    capabilities = frozenset(['identify'])
    touched_fields = frozenset(['title', 'authors',
        'identifier:isbn', 'comments', 'publisher'])
    supports_gzip_transfer_encoding = True
    def __init__(self, *args, **kwargs):
        Source.__init__(self, *args, **kwargs)
        prefs = self.prefs
        prefs.defaults['key_migrated'] = False
        prefs.defaults['isbndb_key'] = None
        if not prefs['key_migrated']:
            prefs['key_migrated'] = True
            try:
                from calibre.customize.ui import config
                key = config['plugin_customization']['IsbnDB']
                prefs['isbndb_key'] = key
            except:
                pass
        self.isbndb_key = prefs['isbndb_key']
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@ -26,7 +26,7 @@ class OpenLibrary(Source):
        br = self.browser
        try:
            ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
-            result_queue.put(ans)
+            result_queue.put((self, ans))
        except Exception as e:
            if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                log.error('No cover for ISBN: %r found'%isbn)
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
        get_cached_cover_urls)
 def isbn_test(isbn):
    isbn_ = check_isbn(isbn)
@ -45,8 +46,80 @@ def authors_test(authors):
    return test
 def init_test(tdir_name):
    tdir = tempfile.gettempdir()
    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    return tdir, lf, log, abort
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
    '''
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
                  test_funcs). args is a dict of keyword arguments to pass to
                  the identify method. test_funcs are callables that accept a
                  Metadata object and return True iff the object passes the
                  test.
    '''
    from calibre.ebooks.metadata.sources.identify import identify
    tdir, lf, log, abort = init_test('Full Identify')
    prints('Log saved to', lf)
    times = []
    for kwargs, test_funcs in tests:
        log('#'*80)
        log('### Running test with:', kwargs)
        log('#'*80)
        prints('Running test with:', kwargs)
        args = (log, abort)
        start_time = time.time()
        results = identify(*args, **kwargs)
        total_time = time.time() - start_time
        times.append(total_time)
        if not results:
            prints('identify failed to find any results')
            break
        prints('Found', len(results), 'matches:', end=' ')
        prints('Smaller relevance means better match')
        for i, mi in enumerate(results):
            prints('*'*30, 'Relevance:', i, '*'*30)
            prints(mi)
            prints('\nCached cover URLs    :',
                    [x[0].name for x in get_cached_cover_urls(mi)])
            prints('*'*75, '\n\n')
        possibles = []
        for mi in results:
            test_failed = False
            for tfunc in test_funcs:
                if not tfunc(mi):
                    test_failed = True
                    break
            if not test_failed:
                possibles.append(mi)
        if not possibles:
            prints('ERROR: No results that passed all tests were found')
            prints('Log saved to', lf)
            raise SystemExit(1)
        if results[0] is not possibles[0]:
            prints('Most relevant result failed the tests')
            raise SystemExit(1)
        log('\n\n')
    prints('Average time per query', sum(times)/len(times))
    prints('Full log is at:', lf)
 # }}}
 def test_identify_plugin(name, tests): # {{{
    '''
    :param name: Plugin name
    :param tests: List of 2-tuples. Each two tuple is of the form (args,
@ -61,11 +134,9 @@ def test_identify_plugin(name, tests):
            plugin = x
            break
    prints('Testing the identify function of', plugin.name)
    prints('Using extra headers:', plugin.browser.addheaders)
-    tdir = tempfile.gettempdir()
+    tdir, lf, log, abort = init_test(plugin.name)
    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
    log = create_log(open(lf, 'wb'))
    abort = Event()
    prints('Log saved to', lf)
    times = []
@ -159,4 +230,5 @@ def test_identify_plugin(name, tests):
    if os.stat(lf).st_size > 10:
        prints('There were some errors/warnings, see log', lf)
 # }}}
--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@ -73,7 +73,11 @@ class xISBN(object):
    def get_isbn_pool(self, isbn):
        data = self.get_data(isbn)
-        isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
+        raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
        isbns = []
        for x in raw:
            isbns += x
        isbns = frozenset(isbns)
        min_year = 100000
        for x in data:
            try:
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@ -282,8 +282,8 @@ class Serializer(object):
                buffer.write('="')
                self.serialize_text(val, quot=True)
                buffer.write('"')
        if elem.text or len(elem) > 0:
        buffer.write('>')
        if elem.text or len(elem) > 0:
            if elem.text:
                self.anchor_offset = None
                self.serialize_text(elem.text)
@ -293,8 +293,6 @@ class Serializer(object):
                    self.anchor_offset = None
                    self.serialize_text(child.tail)
        buffer.write('</%s>' % tag)
        else:
            buffer.write('/>')
    def serialize_text(self, text, quot=False):
        text = text.replace('&', '&amp;')
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
 from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
@ -140,8 +142,17 @@ class Stylizer(object):
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in head:
-            if elem.tag == XHTML('style') and elem.text \
+            if (elem.tag == XHTML('style') and
-               and elem.get('type', CSS_MIME) in OEB_STYLES:
+                elem.get('type', CSS_MIME) in OEB_STYLES):
                text = elem.text if elem.text else u''
                for x in elem:
                    t = getattr(x, 'text', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                    t = getattr(x, 'tail', None)
                    if t:
                        text += u'\n\n' + force_unicode(t, u'utf-8')
                if text:
                    text = XHTML_CSS_NAMESPACE + elem.text
                    text = oeb.css_preprocessor(text)
                    stylesheet = parser.parseString(text, href=cssname)
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
        size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
        font_family(NULL), color(rgb)  {
    if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
    this->font_family = family_name(this->font_name);
    if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@ -134,7 +135,12 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 }
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
-    XMLFont *f = new XMLFont(font_name, size, rgb);
+    XMLFont *f = NULL;
    if (font_name == NULL) 
        font_name = new string("Unknown");
        // font_name must not be deleted
    f = new XMLFont(font_name, size, rgb);
    return this->add_font(f);
 }
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.utils.icu import sort_key
 from calibre.utils.config import test_eight_code
 class EditMetadataAction(InterfaceAction):
@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction):
        row_list = [r.row() for r in rows]
        current_row = 0
        changed = set([])
        db = self.gui.library_view.model().db
        if len(row_list) == 1:
            cr = row_list[0]
@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction):
                list(range(self.gui.library_view.model().rowCount(QModelIndex())))
            current_row = row_list.index(cr)
        if test_eight_code:
            changed = self.do_edit_metadata(row_list, current_row)
        else:
            changed = self.do_edit_metadata_old(row_list, current_row)
        if changed:
            self.gui.library_view.model().refresh_ids(list(changed))
            current = self.gui.library_view.currentIndex()
            m = self.gui.library_view.model()
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
            m.current_changed(current, previous)
            self.gui.tags_view.recount()
    def do_edit_metadata_old(self, row_list, current_row):
        changed = set([])
        db = self.gui.library_view.model().db
        while True:
            prev = next_ = None
            if current_row > 0:
@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction):
            self.gui.library_view.set_current_row(current_row)
            self.gui.library_view.scroll_to_row(current_row)
    def do_edit_metadata(self, row_list, current_row):
        from calibre.gui2.metadata.single import edit_metadata
        db = self.gui.library_view.model().db
        changed, rows_to_refresh = edit_metadata(db, row_list, current_row,
                parent=self.gui, view_slot=self.view_format_callback,
                set_current_callback=self.set_current_callback)
        return changed
    def set_current_callback(self, id_):
        db = self.gui.library_view.model().db
        current_row = db.row(id_)
        self.gui.library_view.set_current_row(current_row)
        self.gui.library_view.scroll_to_row(current_row)
    def view_format_callback(self, id_, fmt):
        view = self.gui.iactions['View']
        if id_ is None:
            view._view_file(fmt)
        else:
            db = self.gui.library_view.model().db
            view.view_format(db.row(id_), fmt)
        if changed:
            self.gui.library_view.model().refresh_ids(list(changed))
            current = self.gui.library_view.currentIndex()
            m = self.gui.library_view.model()
            if self.gui.cover_flow:
                self.gui.cover_flow.dataChanged()
            m.current_changed(current, previous)
            self.gui.tags_view.recount()
    def edit_bulk_metadata(self, checked):
        '''
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@ -7,15 +7,25 @@
    <x>0</x>
    <y>0</y>
    <width>917</width>
-    <height>480</height>
+    <height>492</height>
   </rect>
  </property>
  <property name="windowTitle">
   <string>Dialog</string>
  </property>
  <property name="windowIcon">
   <iconset resource="../../../../resources/images.qrc">
    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
  </property>
  <layout class="QGridLayout" name="gridLayout">
   <item row="0" column="0" colspan="2">
    <widget class="QLabel" name="title">
     <property name="font">
      <font>
       <weight>75</weight>
       <bold>true</bold>
      </font>
     </property>
     <property name="text">
      <string>TextLabel</string>
     </property>
@ -24,10 +34,26 @@
     </property>
    </widget>
   </item>
-   <item row="1" column="0">
+   <item row="1" column="0" rowspan="3">
    <widget class="CoverView" name="cover"/>
   </item>
   <item row="1" column="1">
    <widget class="QScrollArea" name="scrollArea">
     <property name="frameShape">
      <enum>QFrame::NoFrame</enum>
     </property>
     <property name="widgetResizable">
      <bool>true</bool>
     </property>
     <widget class="QWidget" name="scrollAreaWidgetContents">
      <property name="geometry">
       <rect>
        <x>0</x>
        <y>0</y>
        <width>435</width>
        <height>670</height>
       </rect>
      </property>
      <layout class="QVBoxLayout" name="verticalLayout">
       <item>
        <widget class="QLabel" name="text">
@ -72,14 +98,18 @@
         </layout>
        </widget>
       </item>
-     <item>
+      </layout>
     </widget>
    </widget>
   </item>
   <item row="2" column="1">
    <widget class="QCheckBox" name="fit_cover">
     <property name="text">
      <string>Fit &amp;cover within view</string>
     </property>
    </widget>
   </item>
-     <item>
+   <item row="3" column="1">
    <layout class="QHBoxLayout" name="horizontalLayout">
     <item>
      <widget class="QPushButton" name="previous_button">
@ -106,8 +136,6 @@
    </layout>
   </item>
  </layout>
   </item>
  </layout>
 </widget>
 <customwidgets>
  <customwidget>
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en'
 import textwrap, re, os
-from PyQt4.Qt import Qt, QDateEdit, QDate, \
+from PyQt4.Qt import (Qt, QDateEdit, QDate,
-    QIcon, QToolButton, QWidget, QLabel, QGridLayout, \
+    QIcon, QToolButton, QWidget, QLabel, QGridLayout,
-    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
+    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
-    QPushButton, QSpinBox, QLineEdit
+    QPushButton, QSpinBox, QLineEdit, QSizePolicy)
 from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \
        choose_files, error_dialog, choose_images, question_dialog
 from calibre.utils.date import local_tz, qt_to_dt
-from calibre import strftime
+from calibre import strftime, fit_image
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.customize.ui import run_plugins_on_import
 from calibre.utils.date import utcfromtimestamp
@ -426,7 +428,7 @@ class Format(QListWidgetItem): # {{{
        if timestamp is not None:
            ts = timestamp.astimezone(local_tz)
            t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple())
-            text = _('Last modified: %s')%t
+            text = _('Last modified: %s\n\nDouble click to view')%t
            self.setToolTip(text)
            self.setStatusTip(text)
@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{
    def initialize(self, db, id_):
        self.changed = False
        self.formats.clear()
        exts = db.formats(id_, index_is_id=True)
        self.original_val = set([])
        if exts:
@ -574,8 +577,7 @@ class FormatsManager(QWidget): # {{{
            self.changed = True
    def show_format(self, item, *args):
-        fmt = item.ext
+        self.dialog.do_view_format(item.path, item.ext)
        self.dialog.view_format.emit(fmt)
    def get_selected_format_metadata(self, db, id_):
        old = prefs['read_file_metadata']
@ -638,6 +640,23 @@ class Cover(ImageView): # {{{
                self.trim_cover_button, self.download_cover_button,
                self.generate_cover_button]
        self.frame_size = (300, 400)
        self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred,
            QSizePolicy.Preferred))
    def frame_resized(self, ev):
        sz = ev.size()
        self.frame_size = (sz.width()//3, sz.height())
    def sizeHint(self):
        sz = ImageView.sizeHint(self)
        w, h = sz.width(), sz.height()
        resized, nw, nh = fit_image(w, h, self.frame_size[0],
                self.frame_size[1])
        if resized:
            sz = QSize(nw, nh)
        return sz
    def select_cover(self, *args):
        files = choose_images(self, 'change cover dialog',
                             _('Choose cover for ') +
@ -882,8 +901,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
 # }}}
-class ISBNEdit(QLineEdit): # {{{
+class IdentifiersEdit(QLineEdit): # {{{
-    LABEL = _('IS&BN:')
+    LABEL = _('I&ds:')
    BASE_TT = _('Edit the identifiers for this book. '
            'For example: \n\n%s')%(
            'isbn:1565927249, doi:10.1000/182, amazon:1565927249')
    def __init__(self, parent):
        QLineEdit.__init__(self, parent)
@ -893,32 +915,44 @@ class ISBNEdit(QLineEdit): # {{{
    @dynamic_property
    def current_val(self):
        def fget(self):
-            return self.pat.sub('', unicode(self.text()).strip())
+            raw = unicode(self.text()).strip()
            parts = [x.strip() for x in raw.split(',')]
            ans = {}
            for x in parts:
                c = x.split(':')
                if len(c) == 2:
                    ans[c[0]] = c[1]
            return ans
        def fset(self, val):
            if not val:
-                val = ''
+                val = {}
-            self.setText(val.strip())
+            txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()])
            self.setText(txt.strip())
        return property(fget=fget, fset=fset)
    def initialize(self, db, id_):
-        self.current_val = db.isbn(id_, index_is_id=True)
+        self.current_val = db.get_identifiers(id_, index_is_id=True)
        self.original_val = self.current_val
    def commit(self, db, id_):
-        db.set_isbn(id_, self.current_val, notify=False, commit=False)
+        if self.original_val != self.current_val:
            db.set_identifiers(id_, self.current_val, notify=False, commit=False)
        return True
    def validate(self, *args):
-        isbn = self.current_val
+        identifiers = self.current_val
-        tt = _('This ISBN number is valid')
+        isbn = identifiers.get('isbn', '')
        tt = self.BASE_TT
        extra = ''
        if not isbn:
            col = 'rgba(0,255,0,0%)'
        elif check_isbn(isbn) is not None:
            col = 'rgba(0,255,0,20%)'
            extra = '\n\n'+_('This ISBN number is valid')
        else:
            col = 'rgba(255,0,0,20%)'
-            tt = _('This ISBN number is invalid')
+            extra = '\n\n' + _('This ISBN number is invalid')
-        self.setToolTip(tt)
+        self.setToolTip(tt+extra)
        self.setStyleSheet('QLineEdit { background-color: %s }'%col)
 # }}}
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@ -8,31 +10,31 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial
-from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
+from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
-        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
+        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
-        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
+        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem,
-        QSizePolicy, QPalette, QFrame, QSize, QKeySequence
+        QSizePolicy, QPalette, QFrame, QSize, QKeySequence)
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
-from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \
+from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit,
-    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \
+    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit,
-    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \
+    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit,
-    BuddyLabel, DateEdit, PubdateEdit
+    BuddyLabel, DateEdit, PubdateEdit)
 from calibre.gui2.custom_column_widgets import populate_metadata_page
 from calibre.utils.config import tweaks
 class MetadataSingleDialogBase(ResizableDialog):
-    view_format = pyqtSignal(object)
+    view_format = pyqtSignal(object, object)
    cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
    one_line_comments_toolbar = False
    def __init__(self, db, parent=None):
        self.db = db
-        self.changed = set([])
+        self.changed = set()
-        self.books_to_refresh = set([])
+        self.books_to_refresh = set()
-        self.rows_to_refresh = set([])
+        self.rows_to_refresh = set()
        ResizableDialog.__init__(self, parent)
    def setupUi(self, *args): # {{{
@ -145,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog):
        self.tags_editor_button.clicked.connect(self.tags_editor)
        self.basic_metadata_widgets.append(self.tags)
-        self.isbn = ISBNEdit(self)
+        self.identifiers = IdentifiersEdit(self)
-        self.basic_metadata_widgets.append(self.isbn)
+        self.basic_metadata_widgets.append(self.identifiers)
        self.publisher = PublisherEdit(self)
        self.basic_metadata_widgets.append(self.publisher)
@ -192,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog):
                pass # Do something
    # }}}
    def do_view_format(self, path, fmt):
        if path:
            self.view_format.emit(None, path)
        else:
            self.view_format.emit(self.book_id, fmt)
    def do_layout(self):
        raise NotImplementedError()
@ -202,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog):
            widget.initialize(self.db, id_)
        for widget in getattr(self, 'custom_metadata_widgets', []):
            widget.initialize(id_)
        if callable(self.set_current_callback):
            self.set_current_callback(id_)
        # Commented out as it doesn't play nice with Next, Prev buttons
        #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
@ -280,8 +291,8 @@ class MetadataSingleDialogBase(ResizableDialog):
            self.publisher.current_val = mi.publisher
        if not mi.is_null('tags'):
            self.tags.current_val = mi.tags
-        if not mi.is_null('isbn'):
+        if not mi.is_null('identifiers'):
-            self.isbn.current_val = mi.isbn
+            self.identifiers.current_val = mi.identifiers
        if not mi.is_null('pubdate'):
            self.pubdate.current_val = mi.pubdate
        if not mi.is_null('series') and mi.series.strip():
@ -337,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog):
        gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
    # Dialog use methods {{{
-    def start(self, row_list, current_row, view_slot=None):
+    def start(self, row_list, current_row, view_slot=None,
            set_current_callback=None):
        self.row_list = row_list
        self.current_row = current_row
        if view_slot is not None:
            self.view_format.connect(view_slot)
        self.set_current_callback = set_current_callback
        self.do_one(apply_changes=False)
        ret = self.exec_()
        self.break_cycles()
@ -373,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
    def break_cycles(self):
        # Break any reference cycles that could prevent python
        # from garbage collecting this dialog
        self.set_current_callback = self.db = None
        def disconnect(signal):
            try:
                signal.disconnect()
@ -385,6 +399,14 @@ class MetadataSingleDialogBase(ResizableDialog):
                disconnect(x.clicked)
    # }}}
 class Splitter(QSplitter):
    frame_resized = pyqtSignal(object)
    def resizeEvent(self, ev):
        self.frame_resized.emit(ev)
        return QSplitter.resizeEvent(self, ev)
 class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
    def do_layout(self):
@ -437,8 +459,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
        tl.addWidget(self.formats_manager, 0, 6, 3, 1)
-        self.splitter = QSplitter(Qt.Horizontal, self)
+        self.splitter = Splitter(Qt.Horizontal, self)
        self.splitter.addWidget(self.cover)
        self.splitter.frame_resized.connect(self.cover.frame_resized)
        l.addWidget(self.splitter)
        self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
        gb.l = l = QGridLayout()
@ -475,9 +498,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
        create_row2(1, self.rating)
        sto(self.rating, self.tags)
        create_row2(2, self.tags, self.tags_editor_button)
-        sto(self.tags_editor_button, self.isbn)
+        sto(self.tags_editor_button, self.identifiers)
-        create_row2(3, self.isbn)
+        create_row2(3, self.identifiers)
-        sto(self.isbn, self.timestamp)
+        sto(self.identifiers, self.timestamp)
        create_row2(4, self.timestamp, self.timestamp.clear_button)
        sto(self.timestamp.clear_button, self.pubdate)
        create_row2(5, self.pubdate, self.pubdate.clear_button)
@ -562,9 +585,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
        create_row(8, self.pubdate, self.publisher,
                   button=self.pubdate.clear_button, icon='trash.png')
        create_row(9, self.publisher, self.timestamp)
-        create_row(10, self.timestamp, self.isbn,
+        create_row(10, self.timestamp, self.identifiers,
                   button=self.timestamp.clear_button, icon='trash.png')
-        create_row(11, self.isbn, self.comments)
+        create_row(11, self.identifiers, self.comments)
        tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
                   12, 1, 1 ,1)
@ -580,7 +603,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
            sr.setWidget(w)
            gbl.addWidget(sr)
            self.tabs[0].l.addWidget(gb, 0, 1, 1, 1)
-            sto(self.isbn, gb)
+            sto(self.identifiers, gb)
        w = QGroupBox(_('&Comments'), tab0)
        sp = QSizePolicy()
@ -632,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
 # }}}
-def edit_metadata(db, row_list, current_row, parent=None, view_slot=None):
+def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
        set_current_callback=None):
    d = MetadataSingleDialog(db, parent)
-    d.start(row_list, current_row, view_slot=view_slot)
+    d.start(row_list, current_row, view_slot=view_slot,
            set_current_callback=set_current_callback)
    return d.changed, d.rows_to_refresh
 if __name__ == '__main__':
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@ -0,0 +1,39 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF,
        QStyle, QApplication)
 class RichTextDelegate(QStyledItemDelegate): # {{{
    def __init__(self, parent=None):
        QStyledItemDelegate.__init__(self, parent)
    def to_doc(self, index):
        doc = QTextDocument()
        doc.setHtml(index.data().toString())
        return doc
    def sizeHint(self, option, index):
        ans = self.to_doc(index).size().toSize()
        ans.setHeight(ans.height()+10)
        return ans
    def paint(self, painter, option, index):
        painter.save()
        painter.setClipRect(QRectF(option.rect))
        if hasattr(QStyle, 'CE_ItemViewItem'):
            QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter)
        elif option.state & QStyle.State_Selected:
            painter.fillRect(option.rect, option.palette.highlight())
        painter.translate(option.rect.topLeft())
        self.to_doc(index).drawContents(painter)
        painter.restore()
 # }}}
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{
    def do_drop_from_library(self, md, action, row, column, parent):
        idx = parent
        if idx.isValid():
            self.tags_view.setCurrentIndex(idx)
            node = self.data(idx, Qt.UserRole)
            if node.type == TagTreeItem.TAG:
                fm = self.db.metadata_for_field(node.tag.category)
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@ -312,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin):
        p.setPen(pen)
        if self.draw_border:
            p.drawRect(target)
        #p.drawRect(self.rect())
        p.end()
 class CoverView(QGraphicsView, ImageDropMixin):
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer
 from calibre.library.server.opds import OPDSServer
 from calibre.library.server.cache import Cache
 from calibre.library.server.browse import BrowseServer
 from calibre.utils.search_query_parser import saved_searches
 from calibre import prints
 class DispatchController(object): # {{{
@ -178,6 +180,11 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
    def set_search_restriction(self, restriction):
        self.search_restriction_name = restriction
        if restriction:
            if restriction not in saved_searches().names():
                prints('WARNING: Content server: search restriction ',
                       restriction, ' does not exist')
                self.search_restriction = ''
            else:
                self.search_restriction = 'search:"%s"'%restriction
        else:
            self.search_restriction = ''
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@ -30,7 +30,7 @@ entry_points = {
             'calibre-customize  = calibre.customize.ui:main',
             'calibre-complete   = calibre.utils.complete:main',
             'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
-             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
+             'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
             'epub-fix           = calibre.ebooks.epub.fix.main:main',
             'calibre-smtp = calibre.utils.smtp:main',
        ],
@ -183,7 +183,7 @@ class PostInstall:
            from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
            from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
            from calibre.gui2.viewer.main import option_parser as viewer_op
-            from calibre.ebooks.metadata.fetch import option_parser as fem_op
+            from calibre.ebooks.metadata.sources.cli import option_parser as fem_op
            from calibre.gui2.main import option_parser as guiop
            from calibre.utils.smtp import option_parser as smtp_op
            from calibre.library.server.main import option_parser as serv_op
--- a/src/calibre/manual/conf.py
+++ b/src/calibre/manual/conf.py
@ -126,7 +126,7 @@ html_use_modindex = False
 html_use_index = False
 # If true, the reST sources are included in the HTML build as _sources/<name>.
-html_copy_source = False
+html_copy_source = True
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'calibredoc'
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@ -99,7 +99,8 @@ We just need some information from you:
    device.
 Once you send us the output for a particular operating system, support for the device in that operating system
-will appear in the next release of |app|.
+will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
 See `calibre bugs <http://calibre-ebook.com/bugs>`_.
 My device is not being detected by |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@ -71,7 +71,7 @@ Edit metadata
 |emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button.
-    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`.
+    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. 
    2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
    3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list.
    4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list.
@ -79,6 +79,7 @@ Edit metadata
    6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list.
    7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first.
 For more details see :ref:`metadata`.
 .. _convert_ebooks:
--- a/src/calibre/manual/index.rst
+++ b/src/calibre/manual/index.rst
@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion
 .. toctree::
   :maxdepth: 2
-   viewer
+   conversion
 Editing e-book metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -78,7 +78,7 @@ Editing e-book metadata
 .. toctree::
   :maxdepth: 2
-   viewer
+   metadata
 Frequently Asked Questions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@ -38,10 +38,10 @@ class Browser(B):
        self._clone_actions['set_handle_equiv'] = ('set_handle_equiv',
                args, kwargs)
-    def set_handle_gzip(self, *args, **kwargs):
+    def set_handle_gzip(self, handle):
-        B.set_handle_gzip(self, *args, **kwargs)
+        B._set_handler(self, '_gzip', handle)
        self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
-                args, kwargs)
+                (handle,), {})
    def set_debug_redirect(self, *args, **kwargs):
        B.set_debug_redirect(self, *args, **kwargs)
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@ -784,6 +784,7 @@ def write_tweaks(raw):
 tweaks = read_tweaks()
 test_eight_code = tweaks.get('test_eight_code', False)
 def migrate():
    if hasattr(os, 'geteuid') and os.geteuid() == 0:
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
    ret = None
    if return_data:
        ret = data
-        if changed:
+        if changed or isinstance(ret, Image):
            if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                img.set_compression_quality(compression_quality)
            ret = img.export(fmt)
    else:
-        if changed:
+        if changed or isinstance(ret, Image):
            if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                img.set_compression_quality(compression_quality)
            img.save(path)
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@ -1386,12 +1386,19 @@ ol, ul { padding-left: 2em; }
        self.purgedata()
    def s_text_s(self, tag, attrs):
-        """ Generate a number of spaces. ODF has an element; HTML uses &nbsp;
+        # Changed by Kovid to fix non breaking spaces being prepended to
-            We use &#160; so we can send the output through an XML parser if we desire to
+        # element instead of being part of the text flow.
        # We don't use an entity for the nbsp as the contents of self.data will
        # be escaped on writeout.
        """ Generate a number of spaces. We use the non breaking space for
        the text:s ODF element.
        """
-        c = attrs.get( (TEXTNS,'c'),"1")
+        try:
-        for x in xrange(int(c)):
+            c = int(attrs.get((TEXTNS, 'c'), 1))
-            self.writeout('&#160;')
+        except:
            c = 0
        if c > 0:
            self.data.append(u'\u00a0'*c)
    def s_text_span(self, tag, attrs):
        """ The <text:span> element matches the <span> element in HTML. It is