From 6be7471d2e7d93793de6e25e7e9222cb82b49cc4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 08:02:28 -0600
Subject: [PATCH 01/57] F-Secure by louhike and more work on the new metadata
 download system

---
 recipes/f_secure.recipe                       |  22 +++
 src/calibre/ebooks/metadata/sources/amazon.py |   1 +
 src/calibre/ebooks/metadata/sources/base.py   |   6 +
 .../ebooks/metadata/sources/identify.py       | 172 ++++++++++++++++--
 4 files changed, 186 insertions(+), 15 deletions(-)
 create mode 100644 recipes/f_secure.recipe

diff --git a/recipes/f_secure.recipe b/recipes/f_secure.recipe
new file mode 100644
index 0000000000..f276a4961a
--- /dev/null
+++ b/recipes/f_secure.recipe
@@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301860159(BasicNewsRecipe):
+    title          = u'F-Secure Weblog'
+    language = 'en'
+    __author__ = 'louhike'
+    description = u'All the news from the weblog of F-Secure'
+    publisher = u'F-Secure'
+    timefmt = ' [%a, %d %b, %Y]'
+    encoding = 'ISO-8859-1'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content   = False
+    language = 'en_EN'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
+    remove_tags = [dict(name='a'),dict(name='hr')]
+
+    feeds          = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
+    def get_cover_url(self):
+        return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index cfa2b09ea8..9334d818ec 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -282,6 +282,7 @@ class Amazon(Source):
     capabilities = frozenset(['identify', 'cover'])
     touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
         'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
+    has_html_comments = True
 
     AMAZON_DOMAINS = {
             'com': _('US'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 7cc4ed3518..08012c3ee8 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -18,6 +18,9 @@ from calibre.utils.titlecase import titlecase
 from calibre.ebooks.metadata import check_isbn
 
 msprefs = JSONConfig('metadata_sources.json')
+msprefs.defaults['txt_comments'] = False
+msprefs.defaults['ignore_fields'] = []
+msprefs.defaults['max_tags'] = 10
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@@ -104,6 +107,9 @@ class Source(Plugin):
     #: during the identify phase
     touched_fields = frozenset()
 
+    #: Set this to True if your plugin return HTML formatted comments
+    has_html_comments = False
+
     def __init__(self, *args, **kwargs):
         Plugin.__init__(self, *args, **kwargs)
         self._isbn_to_identifier_cache = {}
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 1d4d8840e8..ab86e8ffa2 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -8,13 +8,18 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 import time
+from datetime import datetime
 from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
+from operator import attrgetter
 
 from calibre.customize.ui import metadata_plugins
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import create_log, msprefs
 from calibre.ebooks.metadata.xisbn import xisbn
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.date import utc_tz
+from calibre.utils.html2text import html2text
 
 # How long to wait for more results after first result is found
 WAIT_AFTER_FIRST_RESULT = 30 # seconds
@@ -117,14 +122,30 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     log('Merging results from different sources and finding earliest',
             'publication dates')
     start_time = time.time()
-    merged_results = merge_identify_results(results, log)
+    results = merge_identify_results(results, log)
     log('We have %d merged results, merging took: %.2f seconds' %
-            (len(merged_results), time.time() - start_time))
+            (len(results), time.time() - start_time))
+
+    if msprefs['txt_comments']:
+        for r in results:
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)
+
+    dummy = Metadata(_('Unknown'))
+    max_tags = msprefs['max_tags']
+    for f in msprefs['ignore_fields']:
+        for r in results:
+            setattr(r, f, getattr(dummy, f))
+            r.tags = r.tags[:max_tags]
+
+    return results
+
 
 class ISBNMerge(object):
 
     def __init__(self):
         self.pools = {}
+        self.isbnless_results = []
 
     def isbn_in_pool(self, isbn):
         if isbn:
@@ -140,22 +161,143 @@ class ISBNMerge(object):
                 return True
         return False
 
-    def add_result(self, result, isbn):
-        pool = self.isbn_in_pool(isbn)
-        if pool is None:
-            isbns, min_year = xisbn.get_isbn_pool(isbn)
-            if not isbns:
-                isbns = frozenset([isbn])
-            self.pool[isbns] = pool = (min_year, [])
+    def add_result(self, result):
+        isbn = result.isbn
+        if isbn:
+            pool = self.isbn_in_pool(isbn)
+            if pool is None:
+                isbns, min_year = xisbn.get_isbn_pool(isbn)
+                if not isbns:
+                    isbns = frozenset([isbn])
+                self.pool[isbns] = pool = (min_year, [])
+
+            if not self.pool_has_result_from_same_source(pool, result):
+                pool[1].append(result)
+        else:
+            self.isbnless_results.append(result)
+
+    def finalize(self):
+        has_isbn_result = False
+        for results in self.pools.itervalues():
+            if results:
+                has_isbn_result = True
+                break
+        self.has_isbn_result = has_isbn_result
+
+        if has_isbn_result:
+            self.merge_isbn_results()
+        else:
+            self.results = sorted(self.isbnless_results,
+                    key=attrgetter('relevance_in_source'))
+
+        return self.results
+
+    def merge_isbn_results(self):
+        self.results = []
+        for min_year, results in self.pool.itervalues():
+            if results:
+                self.results.append(self.merge(results, min_year))
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
+    def length_merge(self, attr, results, null_value=None, shortest=True):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        values = [x for x in values if len(x) > 0]
+        if not values:
+            return null_value
+        values.sort(key=len, reverse=not shortest)
+        return values[0]
+
+    def random_merge(self, attr, results, null_value=None):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        return values[0] if values else null_value
+
+    def merge(self, results, min_year):
+        ans = Metadata(_('Unknown'))
+
+        # We assume the shortest title has the least cruft in it
+        ans.title = self.length_merge('title', results, null_value=ans.title)
+
+        # No harm in having extra authors, maybe something useful like an
+        # editor or translator
+        ans.authors = self.length_merge('authors', results,
+                null_value=ans.authors, shortest=False)
+
+        # We assume the shortest publisher has the least cruft in it
+        ans.publisher = self.length_merge('publisher', results,
+                null_value=ans.publisher)
+
+        # We assume the smallest set of tags has the least cruft in it
+        ans.tags = self.length_merge('tags', results,
+                null_value=ans.tags)
+
+        # We assume the longest series has the most info in it
+        ans.series = self.length_merge('series', results,
+                null_value=ans.series, shortest=False)
+        for r in results:
+            if r.series and r.series == ans.series:
+                ans.series_index = r.series_index
+                break
+
+        # Average the rating over all sources
+        ratings = []
+        for r in results:
+            rating = r.rating
+            if rating and rating > 0 and rating <= 5:
+                ratings.append(rating)
+        if ratings:
+            ans.rating = sum(ratings)/len(ratings)
+
+        # Smallest language is likely to be valid
+        ans.language = self.length_merge('language', results,
+                null_value=ans.language)
+
+        # Choose longest comments
+        ans.comments = self.length_merge('comments', results,
+                null_value=ans.comments, shortest=False)
+
+        # Published date
+        if min_year:
+            min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
+            ans.pubdate = min_date
+        else:
+            min_date = datetime(10000, 1, 1, tzinfo=utc_tz)
+            for r in results:
+                if r.pubdate is not None and r.pubdate < min_date:
+                    min_date = r.pubdate
+            if min_date.year < 10000:
+                ans.pubdate = min_date
+
+        # Identifiers
+        for r in results:
+            ans.identifiers.update(r.identifiers)
+
+        # Merge any other fields with no special handling (random merge)
+        touched_fields = set()
+        for r in results:
+            touched_fields |= r.plugin.touched_fields
+
+        for f in touched_fields:
+            if f.startswith('identifier:') or not ans.is_null(f):
+                continue
+            setattr(ans, f, self.random_merge(f, results,
+                null_value=getattr(ans, f)))
+
+        avg = [x.relevance_in_source for x in results]
+        avg = sum(avg)/len(avg)
+        ans.average_source_relevance = avg
+
+        return ans
 
-        if not self.pool_has_result_from_same_source(pool, result):
-            pool[1].append(result)
 
 def merge_identify_results(result_map, log):
+    isbn_merge = ISBNMerge()
     for plugin, results in result_map.iteritems():
         for result in results:
-            isbn = result.isbn
-            if isbn:
-                isbns, min_year = xisbn.get_isbn_pool(isbn)
+            isbn_merge.add_result(result)
+
+    return isbn_merge.finalize()
+
+
 
 

From 7599a89c472d92cd29afdbf33f7c6faa7526211c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 09:32:32 -0600
Subject: [PATCH 02/57] Fix #750336 (Pocketbook 602/902 2.0.6 FW won't connect)

---
 src/calibre/devices/eb600/driver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index 5374c6c4e2..01277980db 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS):
     BCD         = [0x0324]
 
     VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902',
+            'PB903', 'PB']
 
 class POCKETBOOK701(USBMS):
 

From 4b7bc8ce365d99a87ce03cd614b3e8e3f5fceb62 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:04:51 -0600
Subject: [PATCH 03/57] Fix #750288 (TimesofIndia news fetch not working)

---
 recipes/toi.recipe | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/recipes/toi.recipe b/recipes/toi.recipe
index 643d120a36..8a772b6f9d 100644
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class TimesOfIndia(BasicNewsRecipe):
@@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe):
     max_articles_per_feed = 25
 
     no_stylesheets = True
-    keep_only_tags = [dict(attrs={'class':'maintable12'})]
+    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
     remove_tags = [
             dict(style=lambda x: x and 'float' in x),
-            dict(attrs={'class':'prvnxtbg'}),
+            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
     ]
 
     feeds          = [
@@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
  'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
-    def print_version(self, url):
-        return url + '?prtpage=1'
+
+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        if '/0Ltimesofindia' in url:
+            url = url.partition('/0L')[-1]
+            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
+                    '/').replace('0E', '-')
+            url = 'http://' + url.rpartition('/')[0]
+            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
+            if match is not None:
+                num = match.group(1)
+                num = re.sub(r'[^0-9]', '', num)
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    num)
+        else:
+            cms = re.search(r'/(\d+)\.cms', url)
+            if cms is not None:
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    cms.group(1))
+
+        return url
+
 
     def preprocess_html(self, soup):
         return soup

From 7d1c706835bbc17990596804e232233272fc5796 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:41:43 -0600
Subject: [PATCH 04/57] Fix #750101 (Private bug)

---
 src/calibre/ebooks/pdf/fonts.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index 99ab7517c1..9b9e7708a3 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
         size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
         font_family(NULL), color(rgb)  {
 
+
     if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
     this->font_family = family_name(this->font_name);
     if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@@ -134,7 +135,15 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 }
 
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
-    XMLFont *f = new XMLFont(font_name, size, rgb);
+    XMLFont *f = NULL;
+    if (font_name == NULL) {
+        string *fn = new string("Unknown");
+        f = new XMLFont(fn, size, rgb);
+        // fn must not be deleted
+    } else {
+        f = new XMLFont(font_name, size, rgb);
+    }
+
     return this->add_font(f);
 }
 

From 83175da4b297af6c46954ded3b4cd4f476302104 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:59:57 -0600
Subject: [PATCH 05/57] ...

---
 src/calibre/ebooks/pdf/fonts.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index 9b9e7708a3..c5261298ff 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -136,13 +136,9 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
     XMLFont *f = NULL;
-    if (font_name == NULL) {
-        string *fn = new string("Unknown");
-        f = new XMLFont(fn, size, rgb);
-        // fn must not be deleted
-    } else {
-        f = new XMLFont(font_name, size, rgb);
-    }
+    if (font_name == NULL) 
+        font_name = new string("Unknown");
+    f = new XMLFont(font_name, size, rgb);
 
     return this->add_font(f);
 }

From 3e1a43e86a50f06d7f71291825b3475db0d73de8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:00:20 -0600
Subject: [PATCH 06/57] ...

---
 src/calibre/ebooks/pdf/fonts.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index c5261298ff..c3a709869e 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -138,6 +138,7 @@ Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
     XMLFont *f = NULL;
     if (font_name == NULL) 
         font_name = new string("Unknown");
+        // font_name must not be deleted
     f = new XMLFont(font_name, size, rgb);
 
     return this->add_font(f);

From d1859b0f784e972e0ff8af16e7b1afbb9f455c4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:14:12 -0600
Subject: [PATCH 07/57] ...

---
 src/calibre/ebooks/metadata/sources/base.py   |   1 +
 .../ebooks/metadata/sources/identify.py       | 194 +++++++++---------
 2 files changed, 100 insertions(+), 95 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 08012c3ee8..d306a02bcb 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 10
+msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index ab86e8ffa2..87d34c0bff 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
 
-# How long to wait for more results after first result is found
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
-
+# Download worker {{{
 class Worker(Thread):
 
     def __init__(self, plugin, kwargs, abort):
@@ -47,99 +45,9 @@ def is_worker_alive(workers):
             return True
     return False
 
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
-    start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
-
-    kwargs = {
-            'title': title,
-            'authors': authors,
-            'identifiers': identifiers,
-            'timeout': timeout,
-    }
-
-    log('Running identify query with parameters:')
-    log(kwargs)
-    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
-
-    workers = [Worker(p, kwargs, abort) for p in plugins]
-    for w in workers:
-        w.start()
-
-    first_result_at = None
-    results = dict.fromkeys(plugins, [])
-
-    def get_results():
-        found = False
-        for w in workers:
-            try:
-                result = w.rq.get_nowait()
-            except Empty:
-                pass
-            else:
-                results[w.plugin].append(result)
-                found = True
-        return found
-
-    while True:
-        time.sleep(0.2)
-
-        if get_results() and first_result_at is None:
-            first_result_at = time.time()
-
-        if not is_worker_alive(workers):
-            break
-
-        if (first_result_at is not None and time.time() - first_result_at <
-                WAIT_AFTER_FIRST_RESULT):
-            log('Not waiting any longer for more results')
-            abort.set()
-            break
-
-    get_results()
-    sort_kwargs = dict(kwargs)
-    for k in list(sort_kwargs.iterkeys()):
-        if k not in ('title', 'authors', 'identifiers'):
-            sort_kwargs.pop(k)
-
-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
-        if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
-            log(plog)
-            log('\n'+'*'*80)
-
-        for i, result in enumerate(results):
-            result.relevance_in_source = i
-            result.has_cached_cover_url = \
-                plugin.get_cached_cover_url(result.identifiers) is not None
-            result.identify_plugin = plugin
-
-    log('The identify phase took %.2f seconds'%(time.time() - start_time))
-    log('Merging results from different sources and finding earliest',
-            'publication dates')
-    start_time = time.time()
-    results = merge_identify_results(results, log)
-    log('We have %d merged results, merging took: %.2f seconds' %
-            (len(results), time.time() - start_time))
-
-    if msprefs['txt_comments']:
-        for r in results:
-            if r.plugin.has_html_comments and r.comments:
-                r.comments = html2text(r.comments)
-
-    dummy = Metadata(_('Unknown'))
-    max_tags = msprefs['max_tags']
-    for f in msprefs['ignore_fields']:
-        for r in results:
-            setattr(r, f, getattr(dummy, f))
-            r.tags = r.tags[:max_tags]
-
-    return results
+# }}}
 
+# Merge results from different sources {{{
 
 class ISBNMerge(object):
 
@@ -298,6 +206,102 @@ def merge_identify_results(result_map, log):
 
     return isbn_merge.finalize()
 
+# }}}
+
+def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+    start_time = time.time()
+    plugins = list(metadata_plugins['identify'])
+
+    kwargs = {
+            'title': title,
+            'authors': authors,
+            'identifiers': identifiers,
+            'timeout': timeout,
+    }
+
+    log('Running identify query with parameters:')
+    log(kwargs)
+    log('Using plugins:', ', '.join([p.name for p in plugins]))
+    log('The log (if any) from individual plugins is below')
+
+    workers = [Worker(p, kwargs, abort) for p in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    results = dict.fromkeys(plugins, [])
+
+    def get_results():
+        found = False
+        for w in workers:
+            try:
+                result = w.rq.get_nowait()
+            except Empty:
+                pass
+            else:
+                results[w.plugin].append(result)
+                found = True
+        return found
+
+    wait_time = msprefs['wait_after_first_identify_result']
+    while True:
+        time.sleep(0.2)
+
+        if get_results() and first_result_at is None:
+            first_result_at = time.time()
+
+        if not is_worker_alive(workers):
+            break
+
+        if (first_result_at is not None and time.time() - first_result_at <
+                wait_time):
+            log('Not waiting any longer for more results')
+            abort.set()
+            break
+
+    get_results()
+    sort_kwargs = dict(kwargs)
+    for k in list(sort_kwargs.iterkeys()):
+        if k not in ('title', 'authors', 'identifiers'):
+            sort_kwargs.pop(k)
+
+    for plugin, results in results.iteritems():
+        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = plugin.buf.getvalue().strip()
+        if plog:
+            log('\n'+'*'*35, plugin.name, '*'*35)
+            log('Found %d results'%len(results))
+            log(plog)
+            log('\n'+'*'*80)
+
+        for i, result in enumerate(results):
+            result.relevance_in_source = i
+            result.has_cached_cover_url = \
+                plugin.get_cached_cover_url(result.identifiers) is not None
+            result.identify_plugin = plugin
+
+    log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('Merging results from different sources and finding earliest',
+            'publication dates')
+    start_time = time.time()
+    results = merge_identify_results(results, log)
+    log('We have %d merged results, merging took: %.2f seconds' %
+            (len(results), time.time() - start_time))
+
+    if msprefs['txt_comments']:
+        for r in results:
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)
+
+    dummy = Metadata(_('Unknown'))
+    max_tags = msprefs['max_tags']
+    for f in msprefs['ignore_fields']:
+        for r in results:
+            setattr(r, f, getattr(dummy, f))
+            r.tags = r.tags[:max_tags]
+
+    return results
+
 
 
 

From ac3693cfdc586b6c3f89bb5841d6fc881d3c6b7c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:36:26 -0600
Subject: [PATCH 08/57] Conversion pipeline: Handle inline <style> tags that
 put all the actuall CSS inside an XML comment. Fixes #750063 (Private bug)

---
 src/calibre/ebooks/oeb/stylizer.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 0cd17387fe..42974be355 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
+
+from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
@@ -140,13 +142,22 @@ class Stylizer(object):
                 log=logging.getLogger('calibre.css'))
         self.font_face_rules = []
         for elem in head:
-            if elem.tag == XHTML('style') and elem.text \
-               and elem.get('type', CSS_MIME) in OEB_STYLES:
-                text = XHTML_CSS_NAMESPACE + elem.text
-                text = oeb.css_preprocessor(text)
-                stylesheet = parser.parseString(text, href=cssname)
-                stylesheet.namespaces['h'] = XHTML_NS
-                stylesheets.append(stylesheet)
+            if (elem.tag == XHTML('style') and
+                elem.get('type', CSS_MIME) in OEB_STYLES):
+                text = elem.text if elem.text else u''
+                for x in elem:
+                    t = getattr(x, 'text', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                    t = getattr(x, 'tail', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                if text:
+                    text = XHTML_CSS_NAMESPACE + elem.text
+                    text = oeb.css_preprocessor(text)
+                    stylesheet = parser.parseString(text, href=cssname)
+                    stylesheet.namespaces['h'] = XHTML_NS
+                    stylesheets.append(stylesheet)
             elif elem.tag == XHTML('link') and elem.get('href') \
                  and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                  and elem.get('type', CSS_MIME).lower() in OEB_STYLES:

From daa01500443eba9894c0d636365271299f343f79 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 12:06:54 -0600
Subject: [PATCH 09/57] When downloading metadata use the gzip transfer
 encoding when possible for a speedup. Fixes #749304 (metadata from google
 books not readable)

---
 src/calibre/ebooks/metadata/google_books.py   | 1 +
 src/calibre/ebooks/metadata/sources/amazon.py | 1 +
 src/calibre/ebooks/metadata/sources/base.py   | 8 ++++++++
 src/calibre/ebooks/metadata/sources/google.py | 1 +
 src/calibre/utils/browser.py                  | 6 +++---
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 5a5e09234e..2e52bf020d 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -193,6 +193,7 @@ class ResultList(list):
 def search(title=None, author=None, publisher=None, isbn=None,
            min_viewability='none', verbose=False, max_results=40):
     br   = browser()
+    br.set_handle_gzip(True)
     start, entries = 1, []
     while start > 0 and len(entries) <= max_results:
         new, start = Query(title=title, author=author, publisher=publisher,
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 9334d818ec..15282ad896 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -283,6 +283,7 @@ class Amazon(Source):
     touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
         'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
     has_html_comments = True
+    supports_gzip_transfer_encoding = True
 
     AMAZON_DOMAINS = {
             'com': _('US'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index d306a02bcb..d3b564204f 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -111,6 +111,12 @@ class Source(Plugin):
     #: Set this to True if your plugin return HTML formatted comments
     has_html_comments = False
 
+    #: Setting this to True means that the browser object will add
+    #: Accept-Encoding: gzip to all requests. This can speedup downloads
+    #: but make sure that the source actually supports gzip transfer encoding
+    #: correctly first
+    supports_gzip_transfer_encoding = False
+
     def __init__(self, *args, **kwargs):
         Plugin.__init__(self, *args, **kwargs)
         self._isbn_to_identifier_cache = {}
@@ -134,6 +140,8 @@ class Source(Plugin):
     def browser(self):
         if self._browser is None:
             self._browser = browser(user_agent=random_user_agent())
+            if self.supports_gzip_transfer_encoding:
+                self._browser.set_handle_gzip(True)
         return self._browser.clone_browser()
 
     # }}}
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 989320f710..21c99fdf46 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -160,6 +160,7 @@ class GoogleBooks(Source):
     touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
         'comments', 'publisher', 'identifier:isbn', 'rating',
         'identifier:google']) # language currently disabled
+    supports_gzip_transfer_encoding = True
 
     GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
 
diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index 2f77ede6b3..f188d6b45a 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -38,10 +38,10 @@ class Browser(B):
         self._clone_actions['set_handle_equiv'] = ('set_handle_equiv',
                 args, kwargs)
 
-    def set_handle_gzip(self, *args, **kwargs):
-        B.set_handle_gzip(self, *args, **kwargs)
+    def set_handle_gzip(self, handle):
+        self._set_handler('_gzip', handle)
         self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
-                args, kwargs)
+                (handle,), {})
 
     def set_debug_redirect(self, *args, **kwargs):
         B.set_debug_redirect(self, *args, **kwargs)

From 33f84ba169eb233ac3fc0119d475896199a5c3cc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 12:13:45 -0600
Subject: [PATCH 10/57] ...

---
 src/calibre/ebooks/metadata/sources/amazon.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 15282ad896..61b555b041 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
 
-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{
 
     '''
     Get book details from amazons book page in a separate thread

From 6c4b405b0db9517e5a2d2a7a5f541e940ebb935e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 14:22:04 -0600
Subject: [PATCH 11/57] ...

---
 src/calibre/utils/browser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index f188d6b45a..6f8703ab49 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -39,7 +39,7 @@ class Browser(B):
                 args, kwargs)
 
     def set_handle_gzip(self, handle):
-        self._set_handler('_gzip', handle)
+        B._set_handler(self, '_gzip', handle)
         self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
                 (handle,), {})
 

From 461c128bc287160d1431f058d752e585e54b8410 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 15:57:30 -0600
Subject: [PATCH 12/57] Fix book details popup becoming too tall if there is a
 lot of metadata

---
 src/calibre/gui2/dialogs/book_info.ui | 162 +++++++++++++++-----------
 1 file changed, 95 insertions(+), 67 deletions(-)

diff --git a/src/calibre/gui2/dialogs/book_info.ui b/src/calibre/gui2/dialogs/book_info.ui
index 412126a610..9e9e71eda0 100644
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@@ -7,15 +7,25 @@
     <x>0</x>
     <y>0</y>
     <width>917</width>
-    <height>480</height>
+    <height>492</height>
    </rect>
   </property>
   <property name="windowTitle">
    <string>Dialog</string>
   </property>
+  <property name="windowIcon">
+   <iconset resource="../../../../resources/images.qrc">
+    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
+  </property>
   <layout class="QGridLayout" name="gridLayout">
    <item row="0" column="0" colspan="2">
     <widget class="QLabel" name="title">
+     <property name="font">
+      <font>
+       <weight>75</weight>
+       <bold>true</bold>
+      </font>
+     </property>
      <property name="text">
       <string>TextLabel</string>
      </property>
@@ -24,86 +34,104 @@
      </property>
     </widget>
    </item>
-   <item row="1" column="0">
+   <item row="1" column="0" rowspan="3">
     <widget class="CoverView" name="cover"/>
    </item>
    <item row="1" column="1">
-    <layout class="QVBoxLayout" name="verticalLayout">
-     <item>
-      <widget class="QLabel" name="text">
-       <property name="text">
-        <string>TextLabel</string>
-       </property>
-       <property name="alignment">
-        <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
-       </property>
-       <property name="wordWrap">
-        <bool>true</bool>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <widget class="QGroupBox" name="groupBox">
-       <property name="title">
-        <string>Comments</string>
-       </property>
-       <layout class="QVBoxLayout" name="verticalLayout_2">
-        <item>
-         <widget class="QWebView" name="comments">
-          <property name="sizePolicy">
-           <sizepolicy hsizetype="Preferred" vsizetype="Expanding">
-            <horstretch>0</horstretch>
-            <verstretch>0</verstretch>
-           </sizepolicy>
-          </property>
-          <property name="maximumSize">
-           <size>
-            <width>350</width>
-            <height>16777215</height>
-           </size>
-          </property>
-          <property name="url">
-           <url>
-            <string>about:blank</string>
-           </url>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
-      <widget class="QCheckBox" name="fit_cover">
-       <property name="text">
-        <string>Fit &amp;cover within view</string>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <layout class="QHBoxLayout" name="horizontalLayout">
+    <widget class="QScrollArea" name="scrollArea">
+     <property name="frameShape">
+      <enum>QFrame::NoFrame</enum>
+     </property>
+     <property name="widgetResizable">
+      <bool>true</bool>
+     </property>
+     <widget class="QWidget" name="scrollAreaWidgetContents">
+      <property name="geometry">
+       <rect>
+        <x>0</x>
+        <y>0</y>
+        <width>435</width>
+        <height>670</height>
+       </rect>
+      </property>
+      <layout class="QVBoxLayout" name="verticalLayout">
        <item>
-        <widget class="QPushButton" name="previous_button">
+        <widget class="QLabel" name="text">
          <property name="text">
-          <string>&amp;Previous</string>
+          <string>TextLabel</string>
          </property>
-         <property name="icon">
-          <iconset resource="../../../../resources/images.qrc">
-           <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset>
+         <property name="alignment">
+          <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
+         </property>
+         <property name="wordWrap">
+          <bool>true</bool>
          </property>
         </widget>
        </item>
        <item>
-        <widget class="QPushButton" name="next_button">
-         <property name="text">
-          <string>&amp;Next</string>
-         </property>
-         <property name="icon">
-          <iconset resource="../../../../resources/images.qrc">
-           <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset>
+        <widget class="QGroupBox" name="groupBox">
+         <property name="title">
+          <string>Comments</string>
          </property>
+         <layout class="QVBoxLayout" name="verticalLayout_2">
+          <item>
+           <widget class="QWebView" name="comments">
+            <property name="sizePolicy">
+             <sizepolicy hsizetype="Preferred" vsizetype="Expanding">
+              <horstretch>0</horstretch>
+              <verstretch>0</verstretch>
+             </sizepolicy>
+            </property>
+            <property name="maximumSize">
+             <size>
+              <width>350</width>
+              <height>16777215</height>
+             </size>
+            </property>
+            <property name="url">
+             <url>
+              <string>about:blank</string>
+             </url>
+            </property>
+           </widget>
+          </item>
+         </layout>
         </widget>
        </item>
       </layout>
+     </widget>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QCheckBox" name="fit_cover">
+     <property name="text">
+      <string>Fit &amp;cover within view</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="1">
+    <layout class="QHBoxLayout" name="horizontalLayout">
+     <item>
+      <widget class="QPushButton" name="previous_button">
+       <property name="text">
+        <string>&amp;Previous</string>
+       </property>
+       <property name="icon">
+        <iconset resource="../../../../resources/images.qrc">
+         <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QPushButton" name="next_button">
+       <property name="text">
+        <string>&amp;Next</string>
+       </property>
+       <property name="icon">
+        <iconset resource="../../../../resources/images.qrc">
+         <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset>
+       </property>
+      </widget>
      </item>
     </layout>
    </item>

From 70bcc90bca218dd7b211ff4b34e66c0285a875da Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 4 Apr 2011 18:46:05 -0400
Subject: [PATCH 13/57] HTMLZ Output: Fix bug with rewriting links.

---
 src/calibre/ebooks/htmlz/oeb2html.py | 37 ++++++++++++++--------------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py
index 9ee6f76449..827e57b932 100644
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@@ -62,24 +62,22 @@ class OEB2HTML(object):
             self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
         return self.links[aid]
 
-    def rewrite_links(self, tag, attribs, page):
+    def rewrite_link(self, tag, attribs, page):
         # Rewrite ids.
         if 'id' in attribs:
             attribs['id'] = self.get_link_id(page.href, attribs['id'])
         # Rewrite links.
         if tag == 'a':
-            href = attribs['href']
-            href = page.abshref(href)
+            href = page.abshref(attribs['href'])
             if self.url_is_relative(href):
-                if '#' not in href:
-                    href += '#'
-                if href not in self.links:
-                    self.links[href] = 'calibre_link-%s' % len(self.links.keys())
-                href = '#%s' % self.links[href]
-            attribs['href'] = href
+                id = ''
+                if '#' in href:
+                    href, n, id = href.partition('#')
+                href = '#%s' % self.get_link_id(href, id)
+                attribs['href'] = href
         return attribs
 
-    def rewrite_images(self, tag, attribs, page):
+    def rewrite_image(self, tag, attribs, page):
         if tag == 'img':
             src = attribs.get('src', None)
             if src:
@@ -131,6 +129,10 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
         tags = []
         tag = barename(elem.tag)
         attribs = elem.attrib
+        
+        attribs = self.rewrite_link(tag, attribs, page)
+        attribs = self.rewrite_image(tag, attribs, page)
+        
         if tag == 'body':
             tag = 'div'
             attribs['id'] = self.get_link_id(page.href, '')
@@ -147,9 +149,6 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
         if 'style' in attribs:
             del attribs['style']
 
-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():
@@ -218,6 +217,9 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
         tags = []
         tag = barename(elem.tag)
         attribs = elem.attrib
+        
+        attribs = self.rewrite_link(tag, attribs, page)
+        attribs = self.rewrite_image(tag, attribs, page)
 
         style_a = '%s' % style
         if tag == 'body':
@@ -233,9 +235,6 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
         if 'style' in attribs:
             del attribs['style']
 
-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():
@@ -312,6 +311,9 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
         tag = barename(elem.tag)
         attribs = elem.attrib
 
+        attribs = self.rewrite_link(tag, attribs, page)
+        attribs = self.rewrite_image(tag, attribs, page)
+
         if tag == 'body':
             tag = 'div'
             attribs['id'] = self.get_link_id(page.href, '')
@@ -321,9 +323,6 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
         if 'style' in attribs:
             del attribs['style']
 
-        attribs = self.rewrite_links(tag, attribs, page)
-        attribs = self.rewrite_images(tag, attribs, page)
-
         # Turn the rest of the attributes into a string we can write with the tag.
         at = ''
         for k, v in attribs.items():

From c38f81ddf7168c444a99378a6a67c25e13e7cfe7 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 4 Apr 2011 18:46:46 -0400
Subject: [PATCH 14/57] Fix typos.

---
 src/calibre/ebooks/txt/markdownml.py | 2 +-
 src/calibre/ebooks/txt/output.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/txt/markdownml.py b/src/calibre/ebooks/txt/markdownml.py
index c179378049..fe76757eab 100644
--- a/src/calibre/ebooks/txt/markdownml.py
+++ b/src/calibre/ebooks/txt/markdownml.py
@@ -37,7 +37,7 @@ class MarkdownMLizer(object):
             if not self.opts.keep_links:
                 html = re.sub(r'<\s*/*\s*a[^>]*>', '', html)
             if not self.opts.keep_image_references:
-                html = re.sub(r'<\s*img[^>]*>', '', html)\
+                html = re.sub(r'<\s*img[^>]*>', '', html)
             
             text = html2text(html)
         
diff --git a/src/calibre/ebooks/txt/output.py b/src/calibre/ebooks/txt/output.py
index d021cbbba6..4e54a97b45 100644
--- a/src/calibre/ebooks/txt/output.py
+++ b/src/calibre/ebooks/txt/output.py
@@ -11,7 +11,7 @@ from lxml import etree
 
 from calibre.customize.conversion import OutputFormatPlugin, \
     OptionRecommendation
-from calibre.ebooks.oeb.base import OEB_IMAGES 
+from calibre.ebooks.oeb.base import OEB_IMAGES
 from calibre.ebooks.txt.txtml import TXTMLizer
 from calibre.ebooks.txt.newlines import TxtNewlines, specified_newlines
 from calibre.ptempfile import TemporaryDirectory, TemporaryFile

From 608cf75dc06723ed6cebb03b2e347e352c337f67 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 16:47:08 -0600
Subject: [PATCH 15/57] More work on metadata identify

---
 INSTALL                                       |  8 +-
 src/calibre/ebooks/metadata/sources/base.py   |  9 +++
 .../ebooks/metadata/sources/identify.py       | 45 +++++++++++
 src/calibre/ebooks/metadata/sources/test.py   | 78 +++++++++++++++++--
 4 files changed, 132 insertions(+), 8 deletions(-)

diff --git a/INSTALL b/INSTALL
index cb8261eff6..93b119b2e1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
-the facilities of the calibre-debug command to hack on the calibre source. 
+
+Note that you *do not* need to install from source to hack on
+the calibre source code. To get started with calibre development,
+use a normal calibre install and follow the instructions at
+http://calibre-ebook.com/user_manual/develop.html
 
 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@@ -45,3 +48,4 @@ This type of install can be run with the command::
     sudo python setup.py develop
 
 Use the -h flag for help on the develop command.
+
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index d3b564204f..5903a5e710 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
 
 # }}}
 
+def get_cached_cover_urls(mi):
+    from calibre.customize.ui import metadata_plugins
+    plugins = list(metadata_plugins['identify'])
+    for p in plugins:
+        url = p.get_cached_cover_url(mi.identifiers)
+        if url:
+            yield (p, url)
+
+
 class Source(Plugin):
 
     type = _('Metadata source')
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 87d34c0bff..71554595ad 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
 
     return results
 
+if __name__ == '__main__': # tests {{{
+    # To run these test use: calibre-debug -e
+    # src/calibre/ebooks/metadata/sources/identify.py
+    from calibre.ebooks.metadata.sources.test import (test_identify,
+            title_test, authors_test)
+    test_identify(
+        [
 
+            ( # An e-book ISBN not on Amazon, one of the authors is
+              # unknown to Amazon
+                {'identifiers':{'isbn': '9780307459671'},
+                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
+                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
 
+            ),
+
+            (  # This isbn not on amazon
+                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+                    'authors':['Lutz']},
+                [title_test('Learning Python, 3rd Edition',
+                    exact=True), authors_test(['Mark Lutz'])
+                 ]
+
+            ),
+
+            ( # Sophisticated comment formatting
+                {'identifiers':{'isbn': '9781416580829'}},
+                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                    exact=True), authors_test(['Dan Brown'])]
+            ),
+
+            ( # No specific problems
+                {'identifiers':{'isbn': '0743273567'}},
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['F. Scott Fitzgerald'])]
+            ),
+
+            (  # A newer book
+                {'identifiers':{'isbn': '9780316044981'}},
+                [title_test('The Heroes', exact=True),
+                    authors_test(['Joe Abercrombie'])]
+
+            ),
+
+        ])
+# }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index de95a9b887..a7dcc2fa14 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
+        get_cached_cover_urls)
 
 def isbn_test(isbn):
     isbn_ = check_isbn(isbn)
@@ -45,8 +46,75 @@ def authors_test(authors):
 
     return test
 
+def init_test(tdir_name):
+    tdir = tempfile.gettempdir()
+    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
+    log = create_log(open(lf, 'wb'))
+    abort = Event()
+    return tdir, lf, log, abort
 
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
+    '''
+    :param tests: List of 2-tuples. Each two tuple is of the form (args,
+                  test_funcs). args is a dict of keyword arguments to pass to
+                  the identify method. test_funcs are callables that accept a
+                  Metadata object and return True iff the object passes the
+                  test.
+    '''
+    from calibre.ebooks.metadata.sources.identify import identify
+
+    tdir, lf, log, abort = init_test('Full Identify')
+
+    times = []
+
+    for kwargs, test_funcs in tests:
+        prints('Running test with:', kwargs)
+        args = (log, abort)
+        start_time = time.time()
+        results = identify(*args, **kwargs)
+        total_time = time.time() - start_time
+        times.append(total_time)
+        if not results:
+            prints('identify failed to find any results')
+            break
+
+        prints('Found', len(results), 'matches:', end=' ')
+        prints('Smaller relevance means better match')
+
+        for i, mi in enumerate(results):
+            prints('*'*30, 'Relevance:', i, '*'*30)
+            prints(mi)
+            prints('\nCached cover URLs    :',
+                    [x[0].name for x in get_cached_cover_urls(mi)])
+            prints('*'*75, '\n\n')
+
+        possibles = []
+        for mi in results:
+            test_failed = False
+            for tfunc in test_funcs:
+                if not tfunc(mi):
+                    test_failed = True
+                    break
+            if not test_failed:
+                possibles.append(mi)
+
+        if not possibles:
+            prints('ERROR: No results that passed all tests were found')
+            prints('Log saved to', lf)
+            raise SystemExit(1)
+
+        if results[0] is not possibles[0]:
+            prints('Most relevant result failed the tests')
+            raise SystemExit(1)
+
+    prints('Average time per query', sum(times)/len(times))
+
+    if os.stat(lf).st_size > 10:
+        prints('There were some errors/warnings, see log', lf)
+
+# }}}
+
+def test_identify_plugin(name, tests): # {{{
     '''
     :param name: Plugin name
     :param tests: List of 2-tuples. Each two tuple is of the form (args,
@@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
             break
     prints('Testing the identify function of', plugin.name)
 
-    tdir = tempfile.gettempdir()
-    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
-    log = create_log(open(lf, 'wb'))
-    abort = Event()
+    tdir, lf, log, abort = init_test(plugin.name)
     prints('Log saved to', lf)
 
     times = []
@@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
 
     if os.stat(lf).st_size > 10:
         prints('There were some errors/warnings, see log', lf)
+# }}}
 

From bcd06ca5799c5116d15d30cdfee71233405044ec Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 16:48:05 -0600
Subject: [PATCH 16/57] ...

---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index 2c916fc7d7..b518e977c8 100644
--- a/README
+++ b/README
@@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
 For screenshots: https://calibre-ebook.com/demo
 
 For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual
 
 For source code access:
 bzr branch lp:calibre

From f89d0efa1f216018aeab84be2be53ab15012e41a Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Mon, 4 Apr 2011 19:47:59 -0400
Subject: [PATCH 17/57] HTMLZ Output: Use urldefrag instead of doing it
 ourself.

---
 src/calibre/ebooks/htmlz/oeb2html.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py
index 827e57b932..af5867356a 100644
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@@ -12,7 +12,7 @@ Transform OEB content into a single (more or less) HTML file.
 
 import os
 
-from urlparse import urlparse
+from urlparse import urlparse, urldefrag
 
 from calibre import prepare_string_for_xml
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
@@ -70,9 +70,7 @@ class OEB2HTML(object):
         if tag == 'a':
             href = page.abshref(attribs['href'])
             if self.url_is_relative(href):
-                id = ''
-                if '#' in href:
-                    href, n, id = href.partition('#')
+                href, id = urldefrag(href)
                 href = '#%s' % self.get_link_id(href, id)
                 attribs['href'] = href
         return attribs

From f5c1453f43f7b3a4a7149f9c661f2e694e9a2864 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 21:39:03 -0600
Subject: [PATCH 18/57] Fix #750932 (Updated recipe for Perfil)

---
 recipes/perfil.recipe | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipes/perfil.recipe b/recipes/perfil.recipe
index 7db86f9d4a..1104202318 100644
--- a/recipes/perfil.recipe
+++ b/recipes/perfil.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 perfil.com
 '''
@@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
                       dict(name=['iframe','embed','object','base','meta','link'])
                      ,dict(name='a', attrs={'href':'#comentarios'})
                      ,dict(name='div', attrs={'class':'foto3'})
-                     ,dict(name='img', attrs={'alt':'ampliar'})
+                     ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
                     ]
-    keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
+    keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
     remove_attributes=['onload','lang','width','height','border']
 
     feeds = [

From 0d924d81efad3e5fc6d9f178cc5648099a7f016c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:33:29 -0600
Subject: [PATCH 19/57] Replace the Arora and Camino user agents with Firefox 4
 user agents as Amazon was serving different content for those UAs

---
 src/calibre/__init__.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index c3aca457ad..1799072045 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -281,16 +281,17 @@ def get_parsed_proxy(typ='http', debug=True):
 
 def random_user_agent():
     choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
         'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
         'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
         'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
         'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
         'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
         'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
     ]
+    #return choices[-1]
     return choices[random.randint(0, len(choices)-1)]
 
 

From 6e98d78dd753e13e08921215d1d1caccffa80f67 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:35:53 -0600
Subject: [PATCH 20/57] Aamzon plugin: Workaround broken encoding. Detect and
 use mobile user agent search results page

---
 src/calibre/ebooks/metadata/sources/amazon.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 61b555b041..d1c8f24da6 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -64,7 +64,7 @@ class Worker(Thread): # Get details {{{
 
         raw = xml_to_unicode(raw, strip_encoding_pats=True,
                 resolve_entities=True)[0]
-        # open('/t/t.html', 'wb').write(raw)
+        #open('/t/t.html', 'wb').write(raw)
 
         if '<title>404 - ' in raw:
             self.log.error('URL malformed: %r'%self.url)
@@ -218,6 +218,9 @@ class Worker(Thread): # Get details {{{
                     ' @class="emptyClear" or @href]'):
                 c.getparent().remove(c)
             desc = tostring(desc, method='html', encoding=unicode).strip()
+            # Encoding bug in Amazon data U+fffd (replacement char)
+            # in some examples it is present in place of '
+            desc = desc.replace('\ufffd', "'")
             # remove all attributes from tags
             desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
             # Collapse whitespace
@@ -410,6 +413,18 @@ class Amazon(Source):
                     if 'bulk pack' not in title:
                         matches.append(a.get('href'))
                     break
+            if not matches:
+                # This can happen for some user agents that Amazon thinks are
+                # mobile/less capable
+                log('Trying alternate results page markup')
+                for td in root.xpath(
+                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
+                    for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
+                        title = tostring(a, method='text', encoding=unicode).lower()
+                        if 'bulk pack' not in title:
+                            matches.append(a.get('href'))
+                        break
+
 
         # Keep only the top 5 matches as the matches are sorted by relevance by
         # Amazon so lower matches are not likely to be very relevant

From 8dd435ecdbf4f5b5cbec211eff71cea5d3eeb3f2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:36:55 -0600
Subject: [PATCH 21/57] Test and debug the new identify() function

---
 src/calibre/ebooks/metadata/sources/base.py   |  4 +-
 .../ebooks/metadata/sources/identify.py       | 62 ++++++++++---------
 src/calibre/ebooks/metadata/sources/test.py   |  2 +
 src/calibre/ebooks/metadata/xisbn.py          |  6 +-
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 5903a5e710..86468141e1 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -20,7 +20,7 @@ from calibre.ebooks.metadata import check_isbn
 msprefs = JSONConfig('metadata_sources.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
-msprefs.defaults['max_tags'] = 10
+msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 
 def create_log(ostream=None):
@@ -95,7 +95,7 @@ class InternalMetadataCompareKeyGen(object):
 
 def get_cached_cover_urls(mi):
     from calibre.customize.ui import metadata_plugins
-    plugins = list(metadata_plugins['identify'])
+    plugins = list(metadata_plugins(['identify']))
     for p in plugins:
         url = p.get_cached_cover_url(mi.identifiers)
         if url:
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 71554595ad..b65e97a10d 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -57,13 +57,13 @@ class ISBNMerge(object):
 
     def isbn_in_pool(self, isbn):
         if isbn:
-            for p in self.pools:
-                if isbn in p:
-                    return p
+            for isbns, pool in self.pools.iteritems():
+                if isbn in isbns:
+                    return pool
         return None
 
     def pool_has_result_from_same_source(self, pool, result):
-        results = self.pools[pool][1]
+        results = pool[1]
         for r in results:
             if r.identify_plugin is result.identify_plugin:
                 return True
@@ -77,7 +77,7 @@ class ISBNMerge(object):
                 isbns, min_year = xisbn.get_isbn_pool(isbn)
                 if not isbns:
                     isbns = frozenset([isbn])
-                self.pool[isbns] = pool = (min_year, [])
+                self.pools[isbns] = pool = (min_year, [])
 
             if not self.pool_has_result_from_same_source(pool, result):
                 pool[1].append(result)
@@ -102,7 +102,7 @@ class ISBNMerge(object):
 
     def merge_isbn_results(self):
         self.results = []
-        for min_year, results in self.pool.itervalues():
+        for min_year, results in self.pools.itervalues():
             if results:
                 self.results.append(self.merge(results, min_year))
 
@@ -169,11 +169,11 @@ class ISBNMerge(object):
             min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
             ans.pubdate = min_date
         else:
-            min_date = datetime(10000, 1, 1, tzinfo=utc_tz)
+            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
             for r in results:
                 if r.pubdate is not None and r.pubdate < min_date:
                     min_date = r.pubdate
-            if min_date.year < 10000:
+            if min_date.year < 3000:
                 ans.pubdate = min_date
 
         # Identifiers
@@ -183,7 +183,7 @@ class ISBNMerge(object):
         # Merge any other fields with no special handling (random merge)
         touched_fields = set()
         for r in results:
-            touched_fields |= r.plugin.touched_fields
+            touched_fields |= r.identify_plugin.touched_fields
 
         for f in touched_fields:
             if f.startswith('identifier:') or not ans.is_null(f):
@@ -210,7 +210,7 @@ def merge_identify_results(result_map, log):
 
 def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
+    plugins = list(metadata_plugins(['identify']))
 
     kwargs = {
             'title': title,
@@ -229,7 +229,10 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
         w.start()
 
     first_result_at = None
-    results = dict.fromkeys(plugins, [])
+    results = {}
+    for p in plugins:
+        results[p] = []
+    logs = dict([(w.plugin, w.buf) for w in workers])
 
     def get_results():
         found = False
@@ -253,28 +256,31 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
         if not is_worker_alive(workers):
             break
 
-        if (first_result_at is not None and time.time() - first_result_at <
+        if (first_result_at is not None and time.time() - first_result_at >
                 wait_time):
             log('Not waiting any longer for more results')
             abort.set()
             break
 
-    get_results()
+    while not abort.is_set() and get_results():
+        pass
+
     sort_kwargs = dict(kwargs)
     for k in list(sort_kwargs.iterkeys()):
         if k not in ('title', 'authors', 'identifiers'):
             sort_kwargs.pop(k)
 
-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
+    for plugin, presults in results.iteritems():
+        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = logs[plugin].getvalue().strip()
+        log('\n'+'*'*35, plugin.name, '*'*35)
+        log('Request extra headers:', plugin.browser.addheaders)
+        log('Found %d results'%len(presults))
         if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
             log(plog)
-            log('\n'+'*'*80)
+        log('\n'+'*'*80)
 
-        for i, result in enumerate(results):
+        for i, result in enumerate(presults):
             result.relevance_in_source = i
             result.has_cached_cover_url = \
                 plugin.get_cached_cover_url(result.identifiers) is not None
@@ -295,10 +301,10 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
 
     dummy = Metadata(_('Unknown'))
     max_tags = msprefs['max_tags']
-    for f in msprefs['ignore_fields']:
-        for r in results:
+    for r in results:
+        for f in msprefs['ignore_fields']:
             setattr(r, f, getattr(dummy, f))
-            r.tags = r.tags[:max_tags]
+        r.tags = r.tags[:max_tags]
 
     return results
 
@@ -307,8 +313,7 @@ if __name__ == '__main__': # tests {{{
     # src/calibre/ebooks/metadata/sources/identify.py
     from calibre.ebooks.metadata.sources.test import (test_identify,
             title_test, authors_test)
-    test_identify(
-        [
+    tests = [
 
             ( # An e-book ISBN not on Amazon, one of the authors is
               # unknown to Amazon
@@ -330,14 +335,14 @@ if __name__ == '__main__': # tests {{{
 
             ( # Sophisticated comment formatting
                 {'identifiers':{'isbn': '9781416580829'}},
-                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                [title_test('Angels & Demons',
                     exact=True), authors_test(['Dan Brown'])]
             ),
 
             ( # No specific problems
                 {'identifiers':{'isbn': '0743273567'}},
                 [title_test('The great gatsby', exact=True),
-                    authors_test(['F. Scott Fitzgerald'])]
+                    authors_test(['Francis Scott Fitzgerald'])]
             ),
 
             (  # A newer book
@@ -347,6 +352,7 @@ if __name__ == '__main__': # tests {{{
 
             ),
 
-        ])
+        ]
+    test_identify(tests[4:5])
 # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index a7dcc2fa14..428da3ef65 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -64,6 +64,7 @@ def test_identify(tests): # {{{
     from calibre.ebooks.metadata.sources.identify import identify
 
     tdir, lf, log, abort = init_test('Full Identify')
+    prints('Log saved to', lf)
 
     times = []
 
@@ -129,6 +130,7 @@ def test_identify_plugin(name, tests): # {{{
             plugin = x
             break
     prints('Testing the identify function of', plugin.name)
+    prints('Using extra headers:', plugin.browser.addheaders)
 
     tdir, lf, log, abort = init_test(plugin.name)
     prints('Log saved to', lf)
diff --git a/src/calibre/ebooks/metadata/xisbn.py b/src/calibre/ebooks/metadata/xisbn.py
index 69cc3f7cb3..56156c034e 100644
--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@@ -73,7 +73,11 @@ class xISBN(object):
 
     def get_isbn_pool(self, isbn):
         data = self.get_data(isbn)
-        isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
+        raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
+        isbns = []
+        for x in raw:
+            isbns += x
+        isbns = frozenset(isbns)
         min_year = 100000
         for x in data:
             try:

From 42856543527e198e109de8f713cdf2586f06f906 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:52:52 -0600
Subject: [PATCH 22/57] ...

---
 src/calibre/ebooks/metadata/sources/identify.py | 15 ++++++++-------
 src/calibre/ebooks/metadata/sources/test.py     |  8 ++++++--
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index b65e97a10d..322a61bd83 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -208,7 +208,7 @@ def merge_identify_results(result_map, log):
 
 # }}}
 
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
     start_time = time.time()
     plugins = list(metadata_plugins(['identify']))
 
@@ -222,7 +222,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     log('Running identify query with parameters:')
     log(kwargs)
     log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
+    log('The log from individual plugins is below')
 
     workers = [Worker(p, kwargs, abort) for p in plugins]
     for w in workers:
@@ -273,7 +273,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     for plugin, presults in results.iteritems():
         presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
         plog = logs[plugin].getvalue().strip()
-        log('\n'+'*'*35, plugin.name, '*'*35)
+        log('\n'+'*'*30, plugin.name, '*'*30)
         log('Request extra headers:', plugin.browser.addheaders)
         log('Found %d results'%len(presults))
         if plog:
@@ -324,10 +324,10 @@ if __name__ == '__main__': # tests {{{
 
             ),
 
-            (  # This isbn not on amazon
-                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+            (  # Test absence of identifiers
+                {'title':'Learning Python',
                     'authors':['Lutz']},
-                [title_test('Learning Python, 3rd Edition',
+                [title_test('Learning Python',
                     exact=True), authors_test(['Mark Lutz'])
                  ]
 
@@ -353,6 +353,7 @@ if __name__ == '__main__': # tests {{{
             ),
 
         ]
-    test_identify(tests[4:5])
+    #test_identify(tests[1:2])
+    test_identify(tests)
 # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index 428da3ef65..2e72f86c47 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -69,6 +69,9 @@ def test_identify(tests): # {{{
     times = []
 
     for kwargs, test_funcs in tests:
+        log('#'*80)
+        log('### Running test with:', kwargs)
+        log('#'*80)
         prints('Running test with:', kwargs)
         args = (log, abort)
         start_time = time.time()
@@ -108,10 +111,11 @@ def test_identify(tests): # {{{
             prints('Most relevant result failed the tests')
             raise SystemExit(1)
 
+        log('\n\n')
+
     prints('Average time per query', sum(times)/len(times))
 
-    if os.stat(lf).st_size > 10:
-        prints('There were some errors/warnings, see log', lf)
+    prints('Full log is at:', lf)
 
 # }}}
 

From 547454c705b889c61bfd3a4b2998395b1aa38d15 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 12:09:27 -0600
Subject: [PATCH 23/57] Make the full identify() log a lot more useful

---
 src/calibre/ebooks/metadata/sources/identify.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 322a61bd83..17adc6ffc6 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -34,10 +34,12 @@ class Worker(Thread):
         self.log = create_log(self.buf)
 
     def run(self):
+        start = time.time()
         try:
             self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
         except:
             self.log.exception('Plugin', self.plugin.name, 'failed')
+        self.plugin.dl_time_spent = time.time() - start
 
 def is_worker_alive(workers):
     for w in workers:
@@ -276,6 +278,14 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         log('\n'+'*'*30, plugin.name, '*'*30)
         log('Request extra headers:', plugin.browser.addheaders)
         log('Found %d results'%len(presults))
+        time_spent = getattr(plugin, 'dl_time_spent', None)
+        if time_spent is None:
+            log('Downloading was aborted')
+        else:
+            log('Downloading from', plugin.name, 'took', time_spent)
+        for r in presults:
+            log('\n\n---')
+            log(unicode(r))
         if plog:
             log(plog)
         log('\n'+'*'*80)

From d9cea95a71913a256f2c1bc17ef58db9d477b8dd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 12:36:12 -0600
Subject: [PATCH 24/57] Start migrating the isbndb plugin

---
 src/calibre/ebooks/metadata/sources/base.py   |  2 +-
 .../ebooks/metadata/sources/identify.py       |  5 +++
 src/calibre/ebooks/metadata/sources/isbndb.py | 40 +++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/isbndb.py

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 86468141e1..30b804a76e 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -17,7 +17,7 @@ from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
 from calibre.ebooks.metadata import check_isbn
 
-msprefs = JSONConfig('metadata_sources.json')
+msprefs = JSONConfig('metadata_sources/global.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 17adc6ffc6..b04a697ed8 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -272,6 +272,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         if k not in ('title', 'authors', 'identifiers'):
             sort_kwargs.pop(k)
 
+    longest, lp = -1, ''
     for plugin, presults in results.iteritems():
         presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
         plog = logs[plugin].getvalue().strip()
@@ -281,8 +282,11 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         time_spent = getattr(plugin, 'dl_time_spent', None)
         if time_spent is None:
             log('Downloading was aborted')
+            longest, lp = -1, plugin.name
         else:
             log('Downloading from', plugin.name, 'took', time_spent)
+            if time_spent > longest:
+                longest, lp = time_spent, plugin.name
         for r in presults:
             log('\n\n---')
             log(unicode(r))
@@ -297,6 +301,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
             result.identify_plugin = plugin
 
     log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('The longest time (%f) was taken by:'%longest, lp)
     log('Merging results from different sources and finding earliest',
             'publication dates')
     start_time = time.time()
diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py
new file mode 100644
index 0000000000..3cd9d96c81
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.metadata.sources.base import Source
+
+class ISBNDB(Source):
+
+    name = 'ISBNDB'
+    description = _('Downloads metadata from isbndb.com')
+
+    capabilities = frozenset(['identify'])
+    touched_fields = frozenset(['title', 'authors',
+        'identifier:isbn', 'comments', 'publisher'])
+    supports_gzip_transfer_encoding = True
+
+    def __init__(self, *args, **kwargs):
+        Source.__init__(self, *args, **kwargs)
+
+        prefs = self.prefs
+        prefs.defaults['key_migrated'] = False
+        prefs.defaults['isbndb_key'] = None
+
+        if not prefs['key_migrated']:
+            prefs['key_migrated'] = True
+            try:
+                from calibre.customize.ui import config
+                key = config['plugin_customization']['IsbnDB']
+                prefs['isbndb_key'] = key
+            except:
+                pass
+
+        self.isbndb_key = prefs['isbndb_key']
+
+

From 75f61b44a57c3fe80ab2339f5b5856ced9e74582 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 15:12:18 -0600
Subject: [PATCH 25/57] MOBI Output: Don't use self closed tags

---
 src/calibre/ebooks/mobi/writer.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index fccaad8811..5f4c47cdf3 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -282,8 +282,8 @@ class Serializer(object):
                 buffer.write('="')
                 self.serialize_text(val, quot=True)
                 buffer.write('"')
+        buffer.write('>')
         if elem.text or len(elem) > 0:
-            buffer.write('>')
             if elem.text:
                 self.anchor_offset = None
                 self.serialize_text(elem.text)
@@ -292,9 +292,7 @@ class Serializer(object):
                 if child.tail:
                     self.anchor_offset = None
                     self.serialize_text(child.tail)
-            buffer.write('</%s>' % tag)
-        else:
-            buffer.write('/>')
+        buffer.write('</%s>' % tag)
 
     def serialize_text(self, text, quot=False):
         text = text.replace('&', '&amp;')

From 23251c969db2cef7691fc87b4e6e3db6a5f9d8fd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 16:08:05 -0600
Subject: [PATCH 26/57] Fixes to the new metadata dialog

---
 src/calibre/gui2/metadata/basic_widgets.py | 30 ++++++++++++++++++----
 src/calibre/gui2/metadata/single.py        | 21 +++++++++++----
 src/calibre/gui2/widgets.py                |  1 +
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 635a037482..995fa082a7 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
 
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en'
 
 import textwrap, re, os
 
-from PyQt4.Qt import Qt, QDateEdit, QDate, \
-    QIcon, QToolButton, QWidget, QLabel, QGridLayout, \
-    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
-    QPushButton, QSpinBox, QLineEdit
+from PyQt4.Qt import (Qt, QDateEdit, QDate,
+    QIcon, QToolButton, QWidget, QLabel, QGridLayout,
+    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
+    QPushButton, QSpinBox, QLineEdit, QSizePolicy)
 
 from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
@@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \
         choose_files, error_dialog, choose_images, question_dialog
 from calibre.utils.date import local_tz, qt_to_dt
-from calibre import strftime
+from calibre import strftime, fit_image
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.customize.ui import run_plugins_on_import
 from calibre.utils.date import utcfromtimestamp
@@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{
 
     def initialize(self, db, id_):
         self.changed = False
+        self.formats.clear()
         exts = db.formats(id_, index_is_id=True)
         self.original_val = set([])
         if exts:
@@ -638,6 +641,23 @@ class Cover(ImageView): # {{{
                 self.trim_cover_button, self.download_cover_button,
                 self.generate_cover_button]
 
+        self.frame_size = (300, 400)
+        self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred,
+            QSizePolicy.Preferred))
+
+    def frame_resized(self, ev):
+        sz = ev.size()
+        self.frame_size = (sz.width()//3, sz.height())
+
+    def sizeHint(self):
+        sz = ImageView.sizeHint(self)
+        w, h = sz.width(), sz.height()
+        resized, nw, nh = fit_image(w, h, self.frame_size[0],
+                self.frame_size[1])
+        if resized:
+            sz = QSize(nw, nh)
+        return sz
+
     def select_cover(self, *args):
         files = choose_images(self, 'change cover dialog',
                              _('Choose cover for ') +
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 5b17b454e7..e20c519aa8 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
 
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@@ -8,10 +10,10 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial
 
-from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
-        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
-        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
-        QSizePolicy, QPalette, QFrame, QSize, QKeySequence
+from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
+        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
+        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem,
+        QSizePolicy, QPalette, QFrame, QSize, QKeySequence)
 
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
@@ -385,6 +387,14 @@ class MetadataSingleDialogBase(ResizableDialog):
                 disconnect(x.clicked)
     # }}}
 
+class Splitter(QSplitter):
+
+    frame_resized = pyqtSignal(object)
+
+    def resizeEvent(self, ev):
+        self.frame_resized.emit(ev)
+        return QSplitter.resizeEvent(self, ev)
+
 class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
 
     def do_layout(self):
@@ -437,8 +447,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
 
         tl.addWidget(self.formats_manager, 0, 6, 3, 1)
 
-        self.splitter = QSplitter(Qt.Horizontal, self)
+        self.splitter = Splitter(Qt.Horizontal, self)
         self.splitter.addWidget(self.cover)
+        self.splitter.frame_resized.connect(self.cover.frame_resized)
         l.addWidget(self.splitter)
         self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
         gb.l = l = QGridLayout()
diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index e5f1c94342..ea0d2570e5 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -312,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin):
         p.setPen(pen)
         if self.draw_border:
             p.drawRect(target)
+        #p.drawRect(self.rect())
         p.end()
 
 class CoverView(QGraphicsView, ImageDropMixin):

From 261eaad8d275667ff452a6e8d7287bbfb04794b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 16:34:05 -0600
Subject: [PATCH 27/57] Allow editing of all identifiers in the new metadata
 edit dialog

---
 src/calibre/gui2/metadata/basic_widgets.py | 37 +++++++++++++++-------
 src/calibre/gui2/metadata/single.py        | 28 ++++++++--------
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 995fa082a7..bab9073588 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -902,8 +902,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
 
 # }}}
 
-class ISBNEdit(QLineEdit): # {{{
-    LABEL = _('IS&BN:')
+class IdentifiersEdit(QLineEdit): # {{{
+    LABEL = _('I&ds:')
+    BASE_TT = _('Edit the identifiers for this book. '
+            'For example: \n\n%s')%(
+            'isbn:1565927249, doi:10.1000/182, amazon:1565927249')
 
     def __init__(self, parent):
         QLineEdit.__init__(self, parent)
@@ -913,32 +916,44 @@ class ISBNEdit(QLineEdit): # {{{
     @dynamic_property
     def current_val(self):
         def fget(self):
-            return self.pat.sub('', unicode(self.text()).strip())
+            raw = unicode(self.text()).strip()
+            parts = [x.strip() for x in raw.split(',')]
+            ans = {}
+            for x in parts:
+                c = x.split(':')
+                if len(c) == 2:
+                    ans[c[0]] = c[1]
+            return ans
         def fset(self, val):
             if not val:
-                val = ''
-            self.setText(val.strip())
+                val = {}
+            txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()])
+            self.setText(txt.strip())
         return property(fget=fget, fset=fset)
 
     def initialize(self, db, id_):
-        self.current_val = db.isbn(id_, index_is_id=True)
+        self.current_val = db.get_identifiers(id_, index_is_id=True)
         self.original_val = self.current_val
 
     def commit(self, db, id_):
-        db.set_isbn(id_, self.current_val, notify=False, commit=False)
+        if self.original_val != self.current_val:
+            db.set_identifiers(id_, self.current_val, notify=False, commit=False)
         return True
 
     def validate(self, *args):
-        isbn = self.current_val
-        tt = _('This ISBN number is valid')
+        identifiers = self.current_val
+        isbn = identifiers.get('isbn', '')
+        tt = self.BASE_TT
+        extra = ''
         if not isbn:
             col = 'rgba(0,255,0,0%)'
         elif check_isbn(isbn) is not None:
             col = 'rgba(0,255,0,20%)'
+            extra = '\n\n'+_('This ISBN number is valid')
         else:
             col = 'rgba(255,0,0,20%)'
-            tt = _('This ISBN number is invalid')
-        self.setToolTip(tt)
+            extra = '\n\n' + _('This ISBN number is invalid')
+        self.setToolTip(tt+extra)
         self.setStyleSheet('QLineEdit { background-color: %s }'%col)
 
 # }}}
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index e20c519aa8..70307eb3b1 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -17,10 +17,10 @@ from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
 
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
-from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \
-    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \
-    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \
-    BuddyLabel, DateEdit, PubdateEdit
+from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit,
+    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit,
+    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit,
+    BuddyLabel, DateEdit, PubdateEdit)
 from calibre.gui2.custom_column_widgets import populate_metadata_page
 from calibre.utils.config import tweaks
 
@@ -147,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog):
         self.tags_editor_button.clicked.connect(self.tags_editor)
         self.basic_metadata_widgets.append(self.tags)
 
-        self.isbn = ISBNEdit(self)
-        self.basic_metadata_widgets.append(self.isbn)
+        self.identifiers = IdentifiersEdit(self)
+        self.basic_metadata_widgets.append(self.identifiers)
 
         self.publisher = PublisherEdit(self)
         self.basic_metadata_widgets.append(self.publisher)
@@ -282,8 +282,8 @@ class MetadataSingleDialogBase(ResizableDialog):
             self.publisher.current_val = mi.publisher
         if not mi.is_null('tags'):
             self.tags.current_val = mi.tags
-        if not mi.is_null('isbn'):
-            self.isbn.current_val = mi.isbn
+        if not mi.is_null('identifiers'):
+            self.identifiers.current_val = mi.identifiers
         if not mi.is_null('pubdate'):
             self.pubdate.current_val = mi.pubdate
         if not mi.is_null('series') and mi.series.strip():
@@ -486,9 +486,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
         create_row2(1, self.rating)
         sto(self.rating, self.tags)
         create_row2(2, self.tags, self.tags_editor_button)
-        sto(self.tags_editor_button, self.isbn)
-        create_row2(3, self.isbn)
-        sto(self.isbn, self.timestamp)
+        sto(self.tags_editor_button, self.identifiers)
+        create_row2(3, self.identifiers)
+        sto(self.identifiers, self.timestamp)
         create_row2(4, self.timestamp, self.timestamp.clear_button)
         sto(self.timestamp.clear_button, self.pubdate)
         create_row2(5, self.pubdate, self.pubdate.clear_button)
@@ -573,9 +573,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
         create_row(8, self.pubdate, self.publisher,
                    button=self.pubdate.clear_button, icon='trash.png')
         create_row(9, self.publisher, self.timestamp)
-        create_row(10, self.timestamp, self.isbn,
+        create_row(10, self.timestamp, self.identifiers,
                    button=self.timestamp.clear_button, icon='trash.png')
-        create_row(11, self.isbn, self.comments)
+        create_row(11, self.identifiers, self.comments)
         tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
                    12, 1, 1 ,1)
 
@@ -591,7 +591,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
             sr.setWidget(w)
             gbl.addWidget(sr)
             self.tabs[0].l.addWidget(gb, 0, 1, 1, 1)
-            sto(self.isbn, gb)
+            sto(self.identifiers, gb)
 
         w = QGroupBox(_('&Comments'), tab0)
         sp = QSizePolicy()

From cc0f8f4323a788f7736c02ae2685c3e7ddb760d9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 17:21:16 -0600
Subject: [PATCH 28/57] Switch fetch-ebook-metadata to use the new metadata
 download framework

---
 src/calibre/ebooks/metadata/sources/cli.py    | 79 +++++++++++++++++++
 src/calibre/ebooks/metadata/sources/google.py |  3 +
 src/calibre/linux.py                          |  4 +-
 3 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/cli.py

diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
new file mode 100644
index 0000000000..d2cc1648f9
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap
+from io import BytesIO
+from threading import Event
+
+from calibre import prints
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.identify import identify
+
+def option_parser():
+    parser = OptionParser(textwrap.dedent(
+        '''\
+        %prog [options]
+
+        Fetch book metadata from online sources. You must specify at least one
+        of title, authors or ISBN.
+        '''
+    ))
+    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-a', '--authors', help='Book author(s)')
+    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-v', '--verbose', default=False, action='store_true',
+                      help='Print the log to the console (stderr)')
+    parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
+    parser.add_option('-d', '--timeout', default='30',
+            help='Timeout in seconds. Default is 30')
+
+    return parser
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+
+    buf = BytesIO()
+    log = create_log(buf)
+    abort = Event()
+
+    authors = []
+    if opts.authors:
+        authors = string_to_authors(opts.authors)
+
+    identifiers = {}
+    if opts.isbn:
+        identifiers['isbn'] = opts.isbn
+
+    results = identify(log, abort, title=opts.title, authors=authors,
+            identifiers=identifiers, timeout=int(opts.timeout))
+
+    log = buf.getvalue()
+
+    if not results:
+        print (log, file=sys.stderr)
+        prints('No results found', file=sys.stderr)
+        raise SystemExit(1)
+
+    result = results[0]
+    result = (metadata_to_opf(result) if opts.opf else
+                    unicode(result).encode('utf-8'))
+
+    if opts.verbose:
+        print (log, file=sys.stderr)
+
+    print (result)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 21c99fdf46..c4e2f9fe24 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -271,6 +271,9 @@ class GoogleBooks(Source):
             identifiers={}, timeout=30):
         query = self.create_query(log, title=title, authors=authors,
                 identifiers=identifiers)
+        if not query:
+            log.error('Insufficient metadata to construct query')
+            return
         br = self.browser
         try:
             raw = br.open_novisit(query, timeout=timeout).read()
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 64c363b8ba..5c80df20df 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -30,7 +30,7 @@ entry_points = {
              'calibre-customize  = calibre.customize.ui:main',
              'calibre-complete   = calibre.utils.complete:main',
              'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
-             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
+             'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
              'epub-fix           = calibre.ebooks.epub.fix.main:main',
              'calibre-smtp = calibre.utils.smtp:main',
         ],
@@ -183,7 +183,7 @@ class PostInstall:
             from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
             from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
             from calibre.gui2.viewer.main import option_parser as viewer_op
-            from calibre.ebooks.metadata.fetch import option_parser as fem_op
+            from calibre.ebooks.metadata.sources.cli import option_parser as fem_op
             from calibre.gui2.main import option_parser as guiop
             from calibre.utils.smtp import option_parser as smtp_op
             from calibre.library.server.main import option_parser as serv_op

From 265eabf1a613fcdc3651631fd3f9589bf7d4e7be Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 5 Apr 2011 21:59:11 -0400
Subject: [PATCH 29/57] HTMLZ Output: Rewrite links via oeb.base.rewrite_links
 function.

---
 src/calibre/ebooks/htmlz/oeb2html.py | 112 +++++++++++++++------------
 1 file changed, 64 insertions(+), 48 deletions(-)

diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py
index af5867356a..7d915bcfcb 100644
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@@ -12,10 +12,13 @@ Transform OEB content into a single (more or less) HTML file.
 
 import os
 
-from urlparse import urlparse, urldefrag
+from functools import partial
+from lxml import html
+from urlparse import urldefrag
 
 from calibre import prepare_string_for_xml
-from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace
+from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace,\
+    OEB_IMAGES, XLINK, rewrite_links
 from calibre.ebooks.oeb.stylizer import Stylizer
 from calibre.utils.logging import default_log
 
@@ -40,6 +43,8 @@ class OEB2HTML(object):
         self.opts = opts
         self.links = {}
         self.images = {}
+        self.base_hrefs = [item.href for item in oeb_book.spine]
+        self.map_resources(oeb_book)
 
         return self.mlize_spine(oeb_book)
 
@@ -47,6 +52,8 @@ class OEB2HTML(object):
         output = [u'<html><body><head><meta http-equiv="Content-Type" content="text/html;charset=utf-8" /></head>']
         for item in oeb_book.spine:
             self.log.debug('Converting %s to HTML...' % item.href)
+            self.rewrite_ids(item.data, item)
+            rewrite_links(item.data, partial(self.rewrite_link, page=item))
             stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
             output.append('\n\n')
@@ -56,41 +63,61 @@ class OEB2HTML(object):
     def dump_text(self, elem, stylizer, page):
         raise NotImplementedError
 
-    def get_link_id(self, href, aid):
-        aid = '%s#%s' % (href, aid)
-        if aid not in self.links:
-            self.links[aid] = 'calibre_link-%s' % len(self.links.keys())
-        return self.links[aid]
+    def get_link_id(self, href, id=''):
+        if id:
+            href += '#%s' % id
+        if href not in self.links:
+            self.links[href] = '#calibre_link-%s' % len(self.links.keys())
+        return self.links[href]
 
-    def rewrite_link(self, tag, attribs, page):
-        # Rewrite ids.
-        if 'id' in attribs:
-            attribs['id'] = self.get_link_id(page.href, attribs['id'])
-        # Rewrite links.
-        if tag == 'a':
-            href = page.abshref(attribs['href'])
-            if self.url_is_relative(href):
-                href, id = urldefrag(href)
-                href = '#%s' % self.get_link_id(href, id)
-                attribs['href'] = href
-        return attribs
-
-    def rewrite_image(self, tag, attribs, page):
-        if tag == 'img':
-            src = attribs.get('src', None)
-            if src:
-                src = page.abshref(src)
-                if src not in self.images:
-                    ext = os.path.splitext(src)[1]
+    def map_resources(self, oeb_book):
+        for item in oeb_book.manifest:
+            if item.media_type in OEB_IMAGES:
+                if item.href not in self.images:
+                    ext = os.path.splitext(item.href)[1]
                     fname = '%s%s' % (len(self.images), ext)
                     fname = fname.zfill(10)
-                    self.images[src] = fname
-                attribs['src'] = 'images/%s' % self.images[src]
-        return attribs
-
-    def url_is_relative(self, url):
-        o = urlparse(url)
-        return False if o.scheme else True
+                    self.images[item.href] = fname
+            if item in oeb_book.spine:
+                self.get_link_id(item.href)
+                root = item.data.find(XHTML('body'))
+                link_attrs = set(html.defs.link_attrs)
+                link_attrs.add(XLINK('href'))
+                for el in root.iter():
+                    attribs = el.attrib
+                    try:
+                        if not isinstance(el.tag, basestring):
+                            continue
+                    except UnicodeDecodeError:
+                        continue
+                    for attr in attribs:
+                        if attr in link_attrs:
+                            href = item.abshref(attribs[attr])
+                            href, id = urldefrag(href)
+                            if href in self.base_hrefs:
+                                self.get_link_id(href, id)
+    
+    def rewrite_link(self, url, page=None):
+        if not page:
+            return url
+        abs_url = page.abshref(url)
+        if abs_url in self.images:
+            return 'images/%s' % self.images[abs_url]
+        if abs_url in self.links:
+            return self.links[abs_url]
+        return url
+    
+    def rewrite_ids(self, root, page):
+        for el in root.iter():
+            try:
+                tag = el.tag
+            except UnicodeDecodeError:
+                continue
+            if tag == XHTML('body'):
+                el.attrib['id'] = self.get_link_id(page.href)[1:]
+                continue
+            if 'id' in el.attrib:
+                el.attrib['id'] = self.get_link_id(page.href, el.attrib['id'])[1:]
 
     def get_css(self, oeb_book):
         css = u''
@@ -127,13 +154,9 @@ class OEB2HTMLNoCSSizer(OEB2HTML):
         tags = []
         tag = barename(elem.tag)
         attribs = elem.attrib
-        
-        attribs = self.rewrite_link(tag, attribs, page)
-        attribs = self.rewrite_image(tag, attribs, page)
-        
+
         if tag == 'body':
             tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
         tags.append(tag)
 
         # Ignore anything that is set to not be displayed.
@@ -215,14 +238,10 @@ class OEB2HTMLInlineCSSizer(OEB2HTML):
         tags = []
         tag = barename(elem.tag)
         attribs = elem.attrib
-        
-        attribs = self.rewrite_link(tag, attribs, page)
-        attribs = self.rewrite_image(tag, attribs, page)
 
         style_a = '%s' % style
         if tag == 'body':
             tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
             if not style['page-break-before'] == 'always':
                 style_a = 'page-break-before: always;' + ' ' if style_a else '' + style_a
         tags.append(tag)
@@ -277,6 +296,8 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
         output = []
         for item in oeb_book.spine:
             self.log.debug('Converting %s to HTML...' % item.href)
+            self.rewrite_ids(item.data, item)
+            rewrite_links(item.data, partial(self.rewrite_link, page=item))
             stylizer = Stylizer(item.data, item.href, oeb_book, self.opts)
             output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
             output.append('\n\n')
@@ -304,17 +325,12 @@ class OEB2HTMLClassCSSizer(OEB2HTML):
 
         # Setup our variables.
         text = ['']
-        #style = stylizer.style(elem)
         tags = []
         tag = barename(elem.tag)
         attribs = elem.attrib
 
-        attribs = self.rewrite_link(tag, attribs, page)
-        attribs = self.rewrite_image(tag, attribs, page)
-
         if tag == 'body':
             tag = 'div'
-            attribs['id'] = self.get_link_id(page.href, '')
         tags.append(tag)
 
         # Remove attributes we won't want.

From 739609210ef60dc4d0bb15fa0253d0c1b7940081 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Tue, 5 Apr 2011 22:12:50 -0400
Subject: [PATCH 30/57] ...

---
 src/calibre/ebooks/htmlz/oeb2html.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/htmlz/oeb2html.py b/src/calibre/ebooks/htmlz/oeb2html.py
index 7d915bcfcb..b8a6362a99 100644
--- a/src/calibre/ebooks/htmlz/oeb2html.py
+++ b/src/calibre/ebooks/htmlz/oeb2html.py
@@ -88,7 +88,7 @@ class OEB2HTML(object):
                     try:
                         if not isinstance(el.tag, basestring):
                             continue
-                    except UnicodeDecodeError:
+                    except:
                         continue
                     for attr in attribs:
                         if attr in link_attrs:

From 458727a5600af8683101e9362eca9c9a003462f8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 21:35:26 -0600
Subject: [PATCH 31/57] ...

---
 src/calibre/ebooks/metadata/book/base.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 328ab7be26..ff22cd3608 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -125,7 +125,10 @@ class Metadata(object):
         _data = object.__getattribute__(self, '_data')
         if field in TOP_LEVEL_IDENTIFIERS:
             field, val = self._clean_identifier(field, val)
-            _data['identifiers'].update({field: val})
+            identifiers = _data['identifiers']
+            identifiers.pop(field, None)
+            if val:
+                identifiers[field] = val
         elif field == 'identifiers':
             if not val:
                 val = copy.copy(NULL_VALUES.get('identifiers', None))
@@ -224,8 +227,7 @@ class Metadata(object):
         identifiers = object.__getattribute__(self,
             '_data')['identifiers']
 
-        if not val and typ in identifiers:
-            identifiers.pop(typ)
+        identifiers.pop(typ, None)
         if val:
             identifiers[typ] = val
 
@@ -647,7 +649,7 @@ class Metadata(object):
             fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
         if self.series:
             fmt('Series', self.series + ' #%s'%self.format_series_index())
-        if self.language:
+        if not self.is_null('language'):
             fmt('Language', self.language)
         if self.rating is not None:
             fmt('Rating', self.rating)

From afebdabbf140c14a9ee61dd935b659db0dc5e59e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:11:43 -0600
Subject: [PATCH 32/57] save_cover_data_to: Fix return_data returning Image
 object instead of bytes when an Image object is passed in

---
 src/calibre/utils/magick/draw.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py
index 42659d70cc..fdce30177a 100644
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
     ret = None
     if return_data:
         ret = data
-        if changed:
+        if changed or isinstance(ret, Image):
             if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                 img.set_compression_quality(compression_quality)
             ret = img.export(fmt)
     else:
-        if changed:
+        if changed or isinstance(ret, Image):
             if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                 img.set_compression_quality(compression_quality)
             img.save(path)

From 2828ba527699ef3911281f378ea608248c79a52e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:12:18 -0600
Subject: [PATCH 33/57] ...

---
 src/calibre/ebooks/metadata/sources/identify.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index b04a697ed8..77391bac6b 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -354,10 +354,10 @@ if __name__ == '__main__': # tests {{{
                     exact=True), authors_test(['Dan Brown'])]
             ),
 
-            ( # No specific problems
-                {'identifiers':{'isbn': '0743273567'}},
-                [title_test('The great gatsby', exact=True),
-                    authors_test(['Francis Scott Fitzgerald'])]
+            ( # No ISBN
+                {'title':'Justine', 'authors':['Durrel']},
+                [title_test('Justine', exact=True),
+                    authors_test(['Lawrence Durrel'])]
             ),
 
             (  # A newer book

From 6773cf71af98e80ea04d951f043b08f9eae508ab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:16:59 -0600
Subject: [PATCH 34/57] Add cover downloading to the new fetch-ebook-metadata

---
 src/calibre/ebooks/metadata/sources/amazon.py |   5 +-
 src/calibre/ebooks/metadata/sources/base.py   |   9 +-
 src/calibre/ebooks/metadata/sources/cli.py    |  25 ++-
 src/calibre/ebooks/metadata/sources/covers.py | 178 ++++++++++++++++++
 src/calibre/ebooks/metadata/sources/google.py |  20 +-
 .../ebooks/metadata/sources/openlibrary.py    |   2 +-
 6 files changed, 224 insertions(+), 15 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/covers.py

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index d1c8f24da6..d48f502c29 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
 
 class Amazon(Source):
 
-    name = 'Amazon'
+    name = 'Amazon Metadata'
     description = _('Downloads metadata from Amazon')
 
     capabilities = frozenset(['identify', 'cover'])
@@ -493,9 +493,10 @@ class Amazon(Source):
         if abort.is_set():
             return
         br = self.browser
+        log('Downloading cover from:', cached_url)
         try:
             cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
         except:
             log.exception('Failed to download cover from:', cached_url)
     # }}}
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 30b804a76e..33232f25ab 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -22,6 +22,12 @@ msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
+msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
+
+# Google covers are often poor quality (scans/errors) but they have high
+# resolution, so they trump covers from better sources. So make sure they
+# are only used if no other covers are found.
+msprefs.defaults['cover_priorities'] = {'Google':2}
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@@ -340,7 +346,8 @@ class Source(Plugin):
             title=None, authors=None, identifiers={}, timeout=30):
         '''
         Download a cover and put it into result_queue. The parameters all have
-        the same meaning as for :meth:`identify`.
+        the same meaning as for :meth:`identify`. Put (self, cover_data) into
+        result_queue.
 
         This method should use cached cover URLs for efficiency whenever
         possible. When cached data is not present, most plugins simply call
diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index d2cc1648f9..b39da07d53 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -13,10 +13,13 @@ from threading import Event
 
 from calibre import prints
 from calibre.utils.config import OptionParser
+from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ebooks.metadata import string_to_authors
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
+from calibre.ebooks.metadata.sources.covers import download_cover
+
 
 def option_parser():
     parser = OptionParser(textwrap.dedent(
@@ -33,6 +36,8 @@ def option_parser():
     parser.add_option('-v', '--verbose', default=False, action='store_true',
                       help='Print the log to the console (stderr)')
     parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
+    parser.add_option('-c', '--cover',
+            help='Specify a filename. The cover, if available, will be saved to it')
     parser.add_option('-d', '--timeout', default='30',
             help='Timeout in seconds. Default is 30')
 
@@ -57,14 +62,26 @@ def main(args=sys.argv):
     results = identify(log, abort, title=opts.title, authors=authors,
             identifiers=identifiers, timeout=int(opts.timeout))
 
-    log = buf.getvalue()
-
     if not results:
         print (log, file=sys.stderr)
         prints('No results found', file=sys.stderr)
         raise SystemExit(1)
-
     result = results[0]
+
+    cf = None
+    if opts.cover and results:
+        cover = download_cover(log, title=opts.title, authors=authors,
+                identifiers=result.identifiers, timeout=int(opts.timeout))
+        if cover is None:
+            prints('No cover found', file=sys.stderr)
+        else:
+            save_cover_data_to(cover[-1], opts.cover)
+            result.cover = cf = opts.cover
+
+
+    log = buf.getvalue()
+
+
     result = (metadata_to_opf(result) if opts.opf else
                     unicode(result).encode('utf-8'))
 
@@ -72,6 +89,8 @@ def main(args=sys.argv):
         print (log, file=sys.stderr)
 
     print (result)
+    if not opts.opf:
+        prints('Cover               :', cf)
 
     return 0
 
diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py
new file mode 100644
index 0000000000..46b278397c
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from Queue import Queue, Empty
+from threading import Thread, Event
+from io import BytesIO
+
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.base import msprefs, create_log
+from calibre.utils.magick.draw import Image, save_cover_data_to
+
+class Worker(Thread):
+
+    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.plugin = plugin
+        self.abort = abort
+        self.buf = BytesIO()
+        self.log = create_log(self.buf)
+        self.title, self.authors, self.identifiers = (title, authors,
+                identifiers)
+        self.timeout, self.rq = timeout, rq
+        self.time_spent = None
+
+    def run(self):
+        start_time = time.time()
+        if not self.abort.is_set():
+            try:
+                self.plugin.download_cover(self.log, self.rq, self.abort,
+                    title=self.title, authors=self.authors,
+                    identifiers=self.identifiers, timeout=self.timeout)
+            except:
+                self.log.exception('Failed to download cover from',
+                        self.plugin.name)
+        self.time_spent = time.time() - start_time
+
+def is_worker_alive(workers):
+    for w in workers:
+        if w.is_alive():
+            return True
+    return False
+
+def process_result(log, result):
+    plugin, data = result
+    try:
+        im = Image()
+        im.load(data)
+        im.trim(10)
+        width, height = im.size
+        fmt = im.format
+
+        if width < 50 or height < 50:
+            raise ValueError('Image too small')
+        data = save_cover_data_to(im, '/cover.jpg', return_data=True)
+    except:
+        log.exception('Invalid cover from', plugin.name)
+        return None
+    return (plugin, width, height, fmt, data)
+
+def run_download(log, results, abort,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Run the cover download, putting results into the queue :param:`results`.
+
+    Each result is a tuple of the form:
+
+        (plugin, width, height, fmt, bytes)
+
+    '''
+    plugins = list(metadata_plugins(['cover']))
+
+    rq = Queue()
+    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
+            in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    wait_time = msprefs['wait_after_first_cover_result']
+    found_results = {}
+
+    while True:
+        time.sleep(0.1)
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+                if first_result_at is not None:
+                    first_result_at = time.time()
+        except Empty:
+            pass
+
+        if not is_worker_alive(workers):
+            break
+
+        if first_result_at is not None and time.time() - first_result_at > wait_time:
+            log('Not waiting for any more results')
+            abort.set()
+
+        if abort.is_set():
+            break
+
+    while True:
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+        except Empty:
+            break
+
+    for w in workers:
+        wlog = w.buf.getvalue().strip()
+        log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
+        log('Request extra headers:', w.plugin.browser.addheaders)
+        if w.plugin in found_results:
+            result = found_results[w.plugin]
+            log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
+        else:
+            log('Failed to download valid cover')
+        if w.time_spent is None:
+            log('Download aborted')
+        else:
+            log('Took', w.time_spent, 'seconds')
+        if wlog:
+            log(wlog)
+        log('\n'+'*'*80)
+
+
+def download_cover(log,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Synchronous cover download. Returns the "best" cover as per user
+    prefs/cover resolution.
+
+    Return cover is a tuple: (plugin, width, height, fmt, data)
+
+    Returns None if no cover is found.
+    '''
+    rq = Queue()
+    abort = Event()
+
+    run_download(log, rq, abort, title=title, authors=authors,
+            identifiers=identifiers, timeout=timeout)
+
+    results = []
+
+    while True:
+        try:
+            results.append(rq.get_nowait())
+        except Empty:
+            break
+
+    cp = msprefs['cover_priorities']
+
+    def keygen(result):
+        plugin, width, height, fmt, data = result
+        return (cp.get(plugin.name, 1), 1/(width*height))
+
+    results.sort(key=keygen)
+
+    return results[0] if results else None
+
+
+
+
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index c4e2f9fe24..47cfb823bb 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
             log.exception('Failed to parse rating')
 
     # Cover
-    mi.has_google_cover = len(extra.xpath(
-        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
+    mi.has_google_cover = None
+    for x in extra.xpath(
+            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
+        mi.has_google_cover = x.get('href')
+        break
 
     return mi
 # }}}
 
 class GoogleBooks(Source):
 
-    name = 'Google Books'
+    name = 'Google'
     description = _('Downloads metadata from Google Books')
 
     capabilities = frozenset(['identify', 'cover'])
@@ -213,7 +216,7 @@ class GoogleBooks(Source):
             results.sort(key=self.identify_results_keygen(
                 title=title, authors=authors, identifiers=identifiers))
             for mi in results:
-                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                cached_url = self.get_cached_cover_url(mi.identifiers)
                 if cached_url is not None:
                     break
         if cached_url is None:
@@ -223,9 +226,10 @@ class GoogleBooks(Source):
         if abort.is_set():
             return
         br = self.browser
+        log('Downloading cover from:', cached_url)
         try:
             cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
         except:
             log.exception('Failed to download cover from:', cached_url)
 
@@ -254,9 +258,9 @@ class GoogleBooks(Source):
                     goog = ans.identifiers['google']
                     for isbn in getattr(ans, 'all_isbns', []):
                         self.cache_isbn_to_identifier(isbn, goog)
-                        if ans.has_google_cover:
-                            self.cache_identifier_to_cover_url(goog,
-                                    self.GOOGLE_COVER%goog)
+                    if ans.has_google_cover:
+                        self.cache_identifier_to_cover_url(goog,
+                                self.GOOGLE_COVER%goog)
                     self.clean_downloaded_metadata(ans)
                     result_queue.put(ans)
             except:
diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py
index 1fcb33e35f..19b8747265 100644
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@@ -26,7 +26,7 @@ class OpenLibrary(Source):
         br = self.browser
         try:
             ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
-            result_queue.put(ans)
+            result_queue.put((self, ans))
         except Exception as e:
             if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                 log.error('No cover for ISBN: %r found'%isbn)

From 62b1ae917608c47571180ff644ba81bdd4438509 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:04:27 -0600
Subject: [PATCH 35/57] identify(): Merge results with identical title and
 authors that aren't matched by xISBN

---
 src/calibre/ebooks/metadata/sources/cli.py    |  2 +-
 .../ebooks/metadata/sources/identify.py       | 53 ++++++++++++++++---
 2 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index b39da07d53..58042da2bf 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -89,7 +89,7 @@ def main(args=sys.argv):
         print (log, file=sys.stderr)
 
     print (result)
-    if not opts.opf:
+    if not opts.opf and opts.cover:
         prints('Cover               :', cf)
 
     return 0
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 77391bac6b..cbc12b6167 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -20,6 +20,7 @@ from calibre.ebooks.metadata.xisbn import xisbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
+from calibre.utils.icu import lower
 
 # Download worker {{{
 class Worker(Thread):
@@ -97,11 +98,45 @@ class ISBNMerge(object):
         if has_isbn_result:
             self.merge_isbn_results()
         else:
-            self.results = sorted(self.isbnless_results,
+            results = sorted(self.isbnless_results,
                     key=attrgetter('relevance_in_source'))
+            # Pick only the most relevant result from each source
+            self.results = []
+            seen = set()
+            for result in results:
+                if result.identify_plugin not in seen:
+                    seen.add(result.identify_plugin)
+                    self.results.append(result)
+                    result.average_source_relevance = \
+                        result.relevance_in_source
+
+        self.merge_metadata_results()
 
         return self.results
 
+    def merge_metadata_results(self):
+        ' Merge results with identical title and authors '
+        groups = {}
+        for result in self.results:
+            title = lower(result.title if result.title else '')
+            key = (title, tuple([lower(x) for x in result.authors]))
+            if key not in groups:
+                groups[key] = []
+            groups[key].append(result)
+
+        if len(groups) != len(self.results):
+            self.results = []
+            for rgroup in groups.itervalues():
+                rel = [r.average_source_relevance for r in rgroup]
+                if len(rgroup) > 1:
+                    result = self.merge(rgroup, None, do_asr=False)
+                    result.average_source_relevance = sum(rel)/len(rel)
+                else:
+                    result = rgroup[0]
+                self.results.append(result)
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
     def merge_isbn_results(self):
         self.results = []
         for min_year, results in self.pools.itervalues():
@@ -122,7 +157,7 @@ class ISBNMerge(object):
         values = [getattr(x, attr) for x in results if not x.is_null(attr)]
         return values[0] if values else null_value
 
-    def merge(self, results, min_year):
+    def merge(self, results, min_year, do_asr=True):
         ans = Metadata(_('Unknown'))
 
         # We assume the shortest title has the least cruft in it
@@ -185,7 +220,8 @@ class ISBNMerge(object):
         # Merge any other fields with no special handling (random merge)
         touched_fields = set()
         for r in results:
-            touched_fields |= r.identify_plugin.touched_fields
+            if hasattr(r, 'identify_plugin'):
+                touched_fields |= r.identify_plugin.touched_fields
 
         for f in touched_fields:
             if f.startswith('identifier:') or not ans.is_null(f):
@@ -193,9 +229,10 @@ class ISBNMerge(object):
             setattr(ans, f, self.random_merge(f, results,
                 null_value=getattr(ans, f)))
 
-        avg = [x.relevance_in_source for x in results]
-        avg = sum(avg)/len(avg)
-        ans.average_source_relevance = avg
+        if do_asr:
+            avg = [x.relevance_in_source for x in results]
+            avg = sum(avg)/len(avg)
+            ans.average_source_relevance = avg
 
         return ans
 
@@ -210,7 +247,8 @@ def merge_identify_results(result_map, log):
 
 # }}}
 
-def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
+def identify(log, abort, # {{{
+        title=None, authors=None, identifiers={}, timeout=30):
     start_time = time.time()
     plugins = list(metadata_plugins(['identify']))
 
@@ -322,6 +360,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         r.tags = r.tags[:max_tags]
 
     return results
+# }}}
 
 if __name__ == '__main__': # tests {{{
     # To run these test use: calibre-debug -e

From 6059a77d86de5f4433bf4a23fb595a4bf9df9113 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:14:44 -0600
Subject: [PATCH 36/57] Fix author name casing algorithm

---
 src/calibre/ebooks/metadata/sources/base.py | 26 +++++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 33232f25ab..77cc8eaba8 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -15,6 +15,7 @@ from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
+from calibre.utils.icu import capitalize, lower
 from calibre.ebooks.metadata import check_isbn
 
 msprefs = JSONConfig('metadata_sources/global.json')
@@ -107,6 +108,25 @@ def get_cached_cover_urls(mi):
         if url:
             yield (p, url)
 
+def cap_author_token(token):
+    if lower(token) in ('von', 'de', 'el', 'van'):
+        return lower(token)
+    return capitalize(token)
+
+def fixauthors(authors):
+    if not authors:
+        return authors
+    ans = []
+    for x in authors:
+        ans.append(' '.join(map(cap_author_token, x.split())))
+    return ans
+
+def fixcase(x):
+    if x:
+        x = titlecase(x)
+    return x
+
+
 
 class Source(Plugin):
 
@@ -259,13 +279,9 @@ class Source(Plugin):
         before putting the Metadata object into result_queue. You can of
         course, use a custom algorithm suited to your metadata source.
         '''
-        def fixcase(x):
-            if x:
-                x = titlecase(x)
-            return x
         if mi.title:
             mi.title = fixcase(mi.title)
-        mi.authors = list(map(fixcase, mi.authors))
+        mi.authors = fixauthors(mi.authors)
         mi.tags = list(map(fixcase, mi.tags))
         mi.isbn = check_isbn(mi.isbn)
 

From ea2a5c7537457bc4daf8cddfd9eadbc899374dde Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:15:27 -0600
Subject: [PATCH 37/57] ...

---
 src/calibre/ebooks/metadata/sources/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 77cc8eaba8..ac95860f66 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -109,7 +109,7 @@ def get_cached_cover_urls(mi):
             yield (p, url)
 
 def cap_author_token(token):
-    if lower(token) in ('von', 'de', 'el', 'van'):
+    if lower(token) in ('von', 'de', 'el', 'van', 'le'):
         return lower(token)
     return capitalize(token)
 

From 41815e218ae32d7c8faa60244773e9078219e5da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:26:46 -0600
Subject: [PATCH 38/57] Normalize author names with run together initials

---
 src/calibre/ebooks/metadata/sources/base.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index ac95860f66..fe57124cae 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -109,8 +109,12 @@ def get_cached_cover_urls(mi):
             yield (p, url)
 
 def cap_author_token(token):
-    if lower(token) in ('von', 'de', 'el', 'van', 'le'):
-        return lower(token)
+    lt = lower(token)
+    if lt in ('von', 'de', 'el', 'van', 'le'):
+        return lt
+    if re.match(r'([a-z]\.){2,}$', lt) is not None:
+        parts = token.split('.')
+        return '. '.join(map(capitalize, parts)).strip()
     return capitalize(token)
 
 def fixauthors(authors):

From 67eb873eab8b2ef2c5e512f9cc6aef519b47994e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:28:24 -0600
Subject: [PATCH 39/57] ...

---
 src/calibre/ebooks/metadata/sources/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index fe57124cae..faa7420081 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -113,6 +113,7 @@ def cap_author_token(token):
     if lt in ('von', 'de', 'el', 'van', 'le'):
         return lt
     if re.match(r'([a-z]\.){2,}$', lt) is not None:
+        # Normalize tokens of the form J.K. to J. K.
         parts = token.split('.')
         return '. '.join(map(capitalize, parts)).strip()
     return capitalize(token)

From 97c5b041a3fcaa2632f42973b278cb1f42e78118 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 6 Apr 2011 08:11:27 +0100
Subject: [PATCH 40/57] When drag & dropping onto the tag browser, set the
 current node to the one dropped upon.

---
 src/calibre/gui2/tag_view.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index f86e261443..73f423981a 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{
     def do_drop_from_library(self, md, action, row, column, parent):
         idx = parent
         if idx.isValid():
+            self.tags_view.setCurrentIndex(idx)
             node = self.data(idx, Qt.UserRole)
             if node.type == TagTreeItem.TAG:
                 fm = self.db.metadata_for_field(node.tag.category)

From 049776de273cd8bb77fd81887cad0eeb008bc930 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 08:24:25 -0600
Subject: [PATCH 41/57] ...

---
 src/calibre/manual/faq.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 97ef32e9d4..f48fa9dc16 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -99,7 +99,8 @@ We just need some information from you:
     device.
 
 Once you send us the output for a particular operating system, support for the device in that operating system
-will appear in the next release of |app|.
+will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
+See `http://calibre-ebook.com/bugs`_.
 
 My device is not being detected by |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 593f3aaf0a6f08bbab384a66d0f4af9bf074d397 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 08:44:50 -0600
Subject: [PATCH 42/57] Support for Motorola Atrix

---
 src/calibre/devices/android/driver.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 54e4979524..7702a7caf0 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -36,7 +36,9 @@ class ANDROID(USBMS):
             # Motorola
             0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                        0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
+                       0x7086 : [0x0226],
+                     },
 
             # Sony Ericsson
             0xfce : { 0xd12e : [0x0100]},
@@ -101,7 +103,8 @@ class ANDROID(USBMS):
             'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
+            'MB860']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7']

From 504ef950568ab8fcdd0b04c7af5de78ffd4ab0a1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 10:03:46 -0600
Subject: [PATCH 43/57] When dealing with ZIP/RAR archives, use the file header
 rather than the file extension to detrmine the file type, when possible. This
 fixes the common case of CBZ files being actually cbr files and vice versa

---
 src/calibre/__init__.py           | 21 ++++++++++++++++-----
 src/calibre/customize/builtins.py |  8 ++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 1799072045..2f457bf2bc 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -217,14 +217,25 @@ def filename_to_utf8(name):
     return name.decode(codec, 'replace').encode('utf8')
 
 def extract(path, dir):
-    ext = os.path.splitext(path)[1][1:].lower()
     extractor = None
-    if ext in ['zip', 'cbz', 'epub', 'oebzip']:
-        from calibre.libunzip import extract as zipextract
-        extractor = zipextract
-    elif ext in ['cbr', 'rar']:
+    # First use the file header to identify its type
+    with open(path, 'rb') as f:
+        id_ = f.read(3)
+    if id_ == b'Rar':
         from calibre.libunrar import extract as rarextract
         extractor = rarextract
+    elif id_.startswith(b'PK'):
+        from calibre.libunzip import extract as zipextract
+        extractor = zipextract
+    if extractor is None:
+        # Fallback to file extension
+        ext = os.path.splitext(path)[1][1:].lower()
+        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
+            from calibre.libunzip import extract as zipextract
+            extractor = zipextract
+        elif ext in ['cbr', 'rar']:
+            from calibre.libunrar import extract as rarextract
+            extractor = rarextract
     if extractor is None:
         raise Exception('Unknown archive type')
     extractor(path, dir)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 1e40a8e5ff..91abfacc95 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -166,6 +166,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
     description = _('Extract cover from comic files')
 
     def get_metadata(self, stream, ftype):
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            id_ = stream.read(3)
+            stream.seek(pos)
+            if id_ == b'Rar':
+                ftype = 'cbr'
+            elif id.startswith(b'PK'):
+                ftype = 'cbz'
         if ftype == 'cbr':
             from calibre.libunrar import extract_first_alphabetically as extract_first
             extract_first

From ddf6bd19f557a8f546422da0c10c667dd623fc18 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 10:50:55 -0600
Subject: [PATCH 44/57] Add a 'plugin tweak' test_eight_code which if set to
 True will cause calibre to use code intended for the 0.8.x series. Note that
 this code is in heavy development so only set this tweak if you are OK with
 having parts of calibre broken.

---
 src/calibre/customize/builtins.py | 26 ++++++++++++++++----------
 src/calibre/utils/config.py       |  1 +
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 91abfacc95..93cdfe50d9 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -10,6 +10,7 @@ from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.utils.config import test_eight_code
 
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@@ -612,20 +613,25 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 
-from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    KentDistrictLibrary
-from calibre.ebooks.metadata.douban import DoubanBooks
-from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        AmazonCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 
-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
-        NiceBooksCovers]
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        Epubcheck, ]
+
+if not test_eight_code:
+    from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
+        KentDistrictLibrary
+    from calibre.ebooks.metadata.douban import DoubanBooks
+    from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
+    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
+            AmazonCovers, DoubanCovers
+
+    plugins += [GoogleBooks, ISBNDB, Amazon,
+        OpenLibraryCovers, AmazonCovers, DoubanCovers,
+        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
+
 plugins += [
     ComicInput,
     EPUBInput,
diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py
index d5a489acf1..66316d051b 100644
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@@ -784,6 +784,7 @@ def write_tweaks(raw):
 
 
 tweaks = read_tweaks()
+test_eight_code = tweaks.get('test_eight_code', False)
 
 def migrate():
     if hasattr(os, 'geteuid') and os.geteuid() == 0:

From 086a2959173f56fc27d9d55008dc66c4cba8d0bb Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 6 Apr 2011 17:56:54 +0100
Subject: [PATCH 45/57] Fix #751950: make content server ignore non-existent
 search restrictions.

---
 src/calibre/library/server/base.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index dba6abbfa5..eea28469a9 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer
 from calibre.library.server.opds import OPDSServer
 from calibre.library.server.cache import Cache
 from calibre.library.server.browse import BrowseServer
+from calibre.utils.search_query_parser import saved_searches
+from calibre import prints
 
 
 class DispatchController(object): # {{{
@@ -178,7 +180,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
     def set_search_restriction(self, restriction):
         self.search_restriction_name = restriction
         if restriction:
-            self.search_restriction = 'search:"%s"'%restriction
+            if restriction not in saved_searches().names():
+                prints('WARNING: Content server: search restriction ',
+                       restriction, ' does not exist')
+                self.search_restriction = ''
+            else:
+                self.search_restriction = 'search:"%s"'%restriction
         else:
             self.search_restriction = ''
         self.reset_caches()

From 261df5b15d1bb9636f2adf6fb982708fb3c35f91 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 11:09:50 -0600
Subject: [PATCH 46/57] Use test_eight_code in fetch-ebook-metadata

---
 src/calibre/customize/builtins.py          | 18 ++++++++++--------
 src/calibre/ebooks/metadata/sources/cli.py |  9 ++++++++-
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 93cdfe50d9..298799daa5 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -620,7 +620,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
         Epubcheck, ]
 
-if not test_eight_code:
+if test_eight_code:
+# New metadata download plugins {{{
+    from calibre.ebooks.metadata.sources.google import GoogleBooks
+    from calibre.ebooks.metadata.sources.amazon import Amazon
+    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+
+    plugins += [GoogleBooks, Amazon, OpenLibrary]
+
+# }}}
+else:
     from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
         KentDistrictLibrary
     from calibre.ebooks.metadata.douban import DoubanBooks
@@ -1069,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 
 #}}}
 
-# New metadata download plugins {{{
-from calibre.ebooks.metadata.sources.google import GoogleBooks
-from calibre.ebooks.metadata.sources.amazon import Amazon
-from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 
-plugins += [GoogleBooks, Amazon, OpenLibrary]
-
-# }}}
diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index 58042da2bf..cb422f939d 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -19,9 +19,13 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
-
+from calibre.utils.config import test_eight_code
 
 def option_parser():
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import option_parser
+        return option_parser()
+
     parser = OptionParser(textwrap.dedent(
         '''\
         %prog [options]
@@ -44,6 +48,9 @@ def option_parser():
     return parser
 
 def main(args=sys.argv):
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import main
+        return main(args)
     parser = option_parser()
     opts, args = parser.parse_args(args)
 

From d63d47a9f53ffa73017351802c454cd10d010062 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:28:07 -0600
Subject: [PATCH 47/57] Use new edit metadata dialog when test_eight_code is
 True

---
 src/calibre/gui2/actions/edit_metadata.py  | 50 +++++++++++++++++-----
 src/calibre/gui2/metadata/basic_widgets.py |  5 +--
 src/calibre/gui2/metadata/single.py        | 22 ++++++++--
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 3f053e5223..c3ceb27e7e 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.utils.icu import sort_key
+from calibre.utils.config import test_eight_code
 
 class EditMetadataAction(InterfaceAction):
 
@@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction):
 
         row_list = [r.row() for r in rows]
         current_row = 0
-        changed = set([])
-        db = self.gui.library_view.model().db
 
         if len(row_list) == 1:
             cr = row_list[0]
@@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction):
                 list(range(self.gui.library_view.model().rowCount(QModelIndex())))
             current_row = row_list.index(cr)
 
+        if test_eight_code:
+            changed = self.do_edit_metadata(row_list, current_row)
+        else:
+            changed = self.do_edit_metadata_old(row_list, current_row)
+
+        if changed:
+            self.gui.library_view.model().refresh_ids(list(changed))
+            current = self.gui.library_view.currentIndex()
+            m = self.gui.library_view.model()
+            if self.gui.cover_flow:
+                self.gui.cover_flow.dataChanged()
+            m.current_changed(current, previous)
+            self.gui.tags_view.recount()
+
+    def do_edit_metadata_old(self, row_list, current_row):
+        changed = set([])
+        db = self.gui.library_view.model().db
+
         while True:
             prev = next_ = None
             if current_row > 0:
@@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction):
             self.gui.library_view.set_current_row(current_row)
             self.gui.library_view.scroll_to_row(current_row)
 
+    def do_edit_metadata(self, row_list, current_row):
+        from calibre.gui2.metadata.single import edit_metadata
+        db = self.gui.library_view.model().db
+        changed, rows_to_refresh = edit_metadata(db, row_list, current_row,
+                parent=self.gui, view_slot=self.view_format_callback,
+                set_current_callback=self.set_current_callback)
+        return changed
+
+    def set_current_callback(self, id_):
+        db = self.gui.library_view.model().db
+        current_row = db.row(id_)
+        self.gui.library_view.set_current_row(current_row)
+        self.gui.library_view.scroll_to_row(current_row)
+
+    def view_format_callback(self, id_, fmt):
+        view = self.gui.iactions['View']
+        if id_ is None:
+            view._view_file(fmt)
+        else:
+            db = self.gui.library_view.model().db
+            view.view_format(db.row(id_), fmt)
 
-        if changed:
-            self.gui.library_view.model().refresh_ids(list(changed))
-            current = self.gui.library_view.currentIndex()
-            m = self.gui.library_view.model()
-            if self.gui.cover_flow:
-                self.gui.cover_flow.dataChanged()
-            m.current_changed(current, previous)
-            self.gui.tags_view.recount()
 
     def edit_bulk_metadata(self, checked):
         '''
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index bab9073588..0b7d96c07c 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -428,7 +428,7 @@ class Format(QListWidgetItem): # {{{
         if timestamp is not None:
             ts = timestamp.astimezone(local_tz)
             t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple())
-            text = _('Last modified: %s')%t
+            text = _('Last modified: %s\n\nDouble click to view')%t
             self.setToolTip(text)
             self.setStatusTip(text)
 
@@ -577,8 +577,7 @@ class FormatsManager(QWidget): # {{{
             self.changed = True
 
     def show_format(self, item, *args):
-        fmt = item.ext
-        self.dialog.view_format.emit(fmt)
+        self.dialog.do_view_format.emit(item.path, item.ext)
 
     def get_selected_format_metadata(self, db, id_):
         old = prefs['read_file_metadata']
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 70307eb3b1..bba8528573 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -26,7 +26,7 @@ from calibre.utils.config import tweaks
 
 class MetadataSingleDialogBase(ResizableDialog):
 
-    view_format = pyqtSignal(object)
+    view_format = pyqtSignal(object, object)
     cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
     one_line_comments_toolbar = False
 
@@ -194,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog):
                 pass # Do something
     # }}}
 
+    def do_view_format(self, path, fmt):
+        if path:
+            self.view_format.emit(None, path)
+        else:
+            self.view_format.emit(self.book_id, fmt)
+
+
     def do_layout(self):
         raise NotImplementedError()
 
@@ -204,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog):
             widget.initialize(self.db, id_)
         for widget in getattr(self, 'custom_metadata_widgets', []):
             widget.initialize(id_)
+        if callable(self.set_current_callback):
+            self.set_current_callback(id_)
         # Commented out as it doesn't play nice with Next, Prev buttons
         #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
 
@@ -339,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog):
         gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
 
     # Dialog use methods {{{
-    def start(self, row_list, current_row, view_slot=None):
+    def start(self, row_list, current_row, view_slot=None,
+            set_current_callback=None):
         self.row_list = row_list
         self.current_row = current_row
         if view_slot is not None:
             self.view_format.connect(view_slot)
+        self.set_current_callback = set_current_callback
         self.do_one(apply_changes=False)
         ret = self.exec_()
         self.break_cycles()
@@ -375,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
     def break_cycles(self):
         # Break any reference cycles that could prevent python
         # from garbage collecting this dialog
+        self.set_current_callback = None
         def disconnect(signal):
             try:
                 signal.disconnect()
@@ -643,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
 # }}}
 
 
-def edit_metadata(db, row_list, current_row, parent=None, view_slot=None):
+def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
+        set_current_callback=None):
     d = MetadataSingleDialog(db, parent)
-    d.start(row_list, current_row, view_slot=view_slot)
+    d.start(row_list, current_row, view_slot=view_slot,
+            set_current_callback=set_current_callback)
     return d.changed, d.rows_to_refresh
 
 if __name__ == '__main__':

From 901960ec044b8689a5cdf9062a69cdeae8306940 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:29:29 -0600
Subject: [PATCH 48/57] ...

---
 src/calibre/gui2/metadata/basic_widgets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 0b7d96c07c..b0b7115ca1 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -577,7 +577,7 @@ class FormatsManager(QWidget): # {{{
             self.changed = True
 
     def show_format(self, item, *args):
-        self.dialog.do_view_format.emit(item.path, item.ext)
+        self.dialog.do_view_format(item.path, item.ext)
 
     def get_selected_format_metadata(self, db, id_):
         old = prefs['read_file_metadata']

From 67a467107ea387042880d2257c8c61a063b80b4f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:35:01 -0600
Subject: [PATCH 49/57] ...

---
 src/calibre/gui2/metadata/single.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index bba8528573..4f66e0d2ba 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -32,9 +32,9 @@ class MetadataSingleDialogBase(ResizableDialog):
 
     def __init__(self, db, parent=None):
         self.db = db
-        self.changed = set([])
-        self.books_to_refresh = set([])
-        self.rows_to_refresh = set([])
+        self.changed = set()
+        self.books_to_refresh = set()
+        self.rows_to_refresh = set()
         ResizableDialog.__init__(self, parent)
 
     def setupUi(self, *args): # {{{
@@ -386,7 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
     def break_cycles(self):
         # Break any reference cycles that could prevent python
         # from garbage collecting this dialog
-        self.set_current_callback = None
+        self.set_current_callback = self.db = None
         def disconnect(signal):
             try:
                 signal.disconnect()

From 2e08bc51712079312a96c18d0dbb0481bed56bc0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:41:10 -0600
Subject: [PATCH 50/57] Fix #752464 ("Kommersant" recipe is broken)

---
 recipes/kommersant.recipe | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/recipes/kommersant.recipe b/recipes/kommersant.recipe
index f24a5da909..09fb8f8ad8 100644
--- a/recipes/kommersant.recipe
+++ b/recipes/kommersant.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kommersant.ru
 '''
@@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe):
     language              = 'ru'
     publication_type      = 'newspaper'
     masthead_url          = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
-    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
+    extra_css             = """ 
+	                          @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+	                          body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif}
+							  .title{font-size: x-large; font-weight: bold; margin-bottom: 1em}
+							  .subtitle{font-size: large; margin-bottom: 1em}
+							  .document_vvodka{font-weight: bold; margin-bottom: 1em}
+							"""
 
     conversion_options = {
                           'comment'          : description
@@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe):
                         , 'language'         : language
                         }
 
-    keep_only_tags = [
-                         dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
-                        ,dict(attrs={'class':['vvodka','paragraph','author']})
-                     ]
-    remove_tags        = [dict(name=['iframe','object','link','img','base'])]
+    keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
+    remove_tags    = [dict(name=['iframe','object','link','img','base','meta'])]
 
     feeds       = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
 
-    def print_version(self, url):
-        return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'
-
+    def print_version(self, url):	    
+        return url.replace('/doc-rss/','/Doc/') + '/Print'
+		
\ No newline at end of file

From cad3b71b324ffb280b90c309bfdbe7ea376a1430 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 13:54:10 -0600
Subject: [PATCH 51/57] ...

---
 src/calibre/manual/gui.rst   | 3 ++-
 src/calibre/manual/index.rst | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst
index 9307ff30f6..7b6e60c93a 100644
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@@ -71,7 +71,7 @@ Edit metadata
 
 |emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button.
 
-    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`.
+    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. 
     2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
     3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list.
     4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list.
@@ -79,6 +79,7 @@ Edit metadata
     6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list.
     7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first.
 
+For more details see :ref:`metadata`.
 
 .. _convert_ebooks:
 
diff --git a/src/calibre/manual/index.rst b/src/calibre/manual/index.rst
index 996a1de382..e54882dda0 100644
--- a/src/calibre/manual/index.rst
+++ b/src/calibre/manual/index.rst
@@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion
 .. toctree::
    :maxdepth: 2
 
-   viewer
+   conversion
 
 Editing e-book metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -78,7 +78,7 @@ Editing e-book metadata
 .. toctree::
    :maxdepth: 2
 
-   viewer
+   metadata
 
 Frequently Asked Questions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

From 3e230ac838eab493f7125534fb024f1f01eaefb9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 15:46:51 -0600
Subject: [PATCH 52/57] ...

---
 src/calibre/manual/conf.py | 2 +-
 src/calibre/manual/faq.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/manual/conf.py b/src/calibre/manual/conf.py
index fc8962bcfd..d2b3a91d8d 100644
--- a/src/calibre/manual/conf.py
+++ b/src/calibre/manual/conf.py
@@ -126,7 +126,7 @@ html_use_modindex = False
 html_use_index = False
 
 # If true, the reST sources are included in the HTML build as _sources/<name>.
-html_copy_source = False
+html_copy_source = True
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'calibredoc'
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index f48fa9dc16..f8b257fd75 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -100,7 +100,7 @@ We just need some information from you:
 
 Once you send us the output for a particular operating system, support for the device in that operating system
 will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
-See `http://calibre-ebook.com/bugs`_.
+See `calibre bugs <http://calibre-ebook.com/bugs>`_.
 
 My device is not being detected by |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 58899e65ef4e532976f59fb6da1c1484a9a5ad4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 16:23:18 -0600
Subject: [PATCH 53/57] ...

---
 src/calibre/gui2/metadata/single_download.py | 39 ++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 src/calibre/gui2/metadata/single_download.py

diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
new file mode 100644
index 0000000000..ace4133d7a
--- /dev/null
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF,
+        QStyle, QApplication)
+
+class RichTextDelegate(QStyledItemDelegate): # {{{
+
+    def __init__(self, parent=None):
+        QStyledItemDelegate.__init__(self, parent)
+
+    def to_doc(self, index):
+        doc = QTextDocument()
+        doc.setHtml(index.data().toString())
+        return doc
+
+    def sizeHint(self, option, index):
+        ans = self.to_doc(index).size().toSize()
+        ans.setHeight(ans.height()+10)
+        return ans
+
+    def paint(self, painter, option, index):
+        painter.save()
+        painter.setClipRect(QRectF(option.rect))
+        if hasattr(QStyle, 'CE_ItemViewItem'):
+            QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter)
+        elif option.state & QStyle.State_Selected:
+            painter.fillRect(option.rect, option.palette.highlight())
+        painter.translate(option.rect.topLeft())
+        self.to_doc(index).drawContents(painter)
+        painter.restore()
+# }}}
+

From 535622519776599f3ded7b62dfb9c16a6b4acf8d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 21:51:03 -0600
Subject: [PATCH 54/57] Start work on new metadata download GUIs

---
 src/calibre/ebooks/metadata/sources/amazon.py |   2 +-
 src/calibre/ebooks/metadata/sources/base.py   |   7 +
 src/calibre/ebooks/metadata/sources/covers.py |   2 +-
 .../ebooks/metadata/sources/identify.py       |   2 +-
 src/calibre/ebooks/metadata/sources/isbndb.py |   3 +
 src/calibre/gui2/metadata/single_download.py  | 154 +++++++++++++++++-
 src/calibre/manual/server.rst                 |   4 +-
 7 files changed, 167 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index d48f502c29..b070132de9 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
 
 class Amazon(Source):
 
-    name = 'Amazon Metadata'
+    name = 'Amazon Store'
     description = _('Downloads metadata from Amazon')
 
     capabilities = frozenset(['identify', 'cover'])
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index faa7420081..d4e090084c 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -167,6 +167,13 @@ class Source(Plugin):
 
     # Configuration {{{
 
+    def is_configured(self):
+        '''
+        Return False if your plugin needs to be configured before it can be
+        used. For example, it might need a username/password/API key.
+        '''
+        return True
+
     @property
     def prefs(self):
         if self._config_obj is None:
diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py
index 46b278397c..cf6ec90c54 100644
--- a/src/calibre/ebooks/metadata/sources/covers.py
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@@ -76,7 +76,7 @@ def run_download(log, results, abort,
         (plugin, width, height, fmt, bytes)
 
     '''
-    plugins = list(metadata_plugins(['cover']))
+    plugins = [p for p in metadata_plugins(['cover']) if p.is_configured()]
 
     rq = Queue()
     workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index cbc12b6167..8c6172f0e2 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -250,7 +250,7 @@ def merge_identify_results(result_map, log):
 def identify(log, abort, # {{{
         title=None, authors=None, identifiers={}, timeout=30):
     start_time = time.time()
-    plugins = list(metadata_plugins(['identify']))
+    plugins = [p for p in metadata_plugins(['identify']) if p.is_configured()]
 
     kwargs = {
             'title': title,
diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py
index 3cd9d96c81..ab9342c6cb 100644
--- a/src/calibre/ebooks/metadata/sources/isbndb.py
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@@ -37,4 +37,7 @@ class ISBNDB(Source):
 
         self.isbndb_key = prefs['isbndb_key']
 
+    def is_configured(self):
+        return self.isbndb_key is not None
+
 
diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
index ace4133d7a..be521b6000 100644
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -7,8 +7,13 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF,
-        QStyle, QApplication)
+from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
+        QStyle, QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
+        QStackedWidget, QWidget, QTableView, QGridLayout, QFontInfo, QPalette)
+from PyQt4.QtWebKit import QWebView
+
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata import authors_to_string
 
 class RichTextDelegate(QStyledItemDelegate): # {{{
 
@@ -37,3 +42,148 @@ class RichTextDelegate(QStyledItemDelegate): # {{{
         painter.restore()
 # }}}
 
+class ResultsView(QTableView):
+
+    def __init__(self, parent=None):
+        QTableView.__init__(self, parent)
+
+class Comments(QWebView): # {{{
+
+    def __init__(self, parent=None):
+        QWebView.__init__(self, parent)
+        self.setAcceptDrops(False)
+        self.setMaximumWidth(270)
+        self.setMinimumWidth(270)
+
+        palette = self.palette()
+        palette.setBrush(QPalette.Base, Qt.transparent)
+        self.page().setPalette(palette)
+        self.setAttribute(Qt.WA_OpaquePaintEvent, False)
+
+    def turnoff_scrollbar(self, *args):
+        self.page().mainFrame().setScrollBarPolicy(Qt.Horizontal, Qt.ScrollBarAlwaysOff)
+
+    def show_data(self, html):
+        def color_to_string(col):
+            ans = '#000000'
+            if col.isValid():
+                col = col.toRgb()
+                if col.isValid():
+                    ans = unicode(col.name())
+            return ans
+
+        f = QFontInfo(QApplication.font(self.parent())).pixelSize()
+        c = color_to_string(QApplication.palette().color(QPalette.Normal,
+                        QPalette.WindowText))
+        templ = '''\
+        <html>
+            <head>
+            <style type="text/css">
+                body, td {background-color: transparent; font-size: %dpx; color: %s }
+                a { text-decoration: none; color: blue }
+                div.description { margin-top: 0; padding-top: 0; text-indent: 0 }
+                table { margin-bottom: 0; padding-bottom: 0; }
+            </style>
+            </head>
+            <body>
+            <div class="description">
+            %%s
+            </div>
+            </body>
+        <html>
+        '''%(f, c)
+        self.setHtml(templ%html)
+# }}}
+
+class IdentifyWidget(QWidget):
+
+    def __init__(self, parent=None):
+        QWidget.__init__(self, parent)
+
+        self.l = l = QGridLayout()
+        self.setLayout(l)
+
+        names = ['<b>'+p.name+'</b>' for p in metadata_plugins(['identify']) if
+                p.is_configured()]
+        self.top = QLabel('<p>'+_('calibre is downloading metadata from: ') +
+            ', '.join(names))
+        self.top.setWordWrap(True)
+        l.addWidget(self.top, 0, 0)
+
+        self.results_view = ResultsView(self)
+        l.addWidget(self.results_view, 1, 0)
+
+        self.comments_view = Comments(self)
+        l.addWidget(self.comments_view, 1, 1)
+
+        self.query = QLabel('download starting...')
+        f = self.query.font()
+        f.setPointSize(f.pointSize()-2)
+        self.query.setFont(f)
+        self.query.setWordWrap(True)
+        l.addWidget(self.query, 2, 0, 1, 2)
+
+    def start(self, title=None, authors=None, identifiers={}):
+        parts = []
+        if title:
+            parts.append('title:'+title)
+        if authors:
+            parts.append('authors:'+authors_to_string(authors))
+        if identifiers:
+            x = ', '.join('%s:%s'%(k, v) for k, v in identifiers)
+            parts.append(x)
+        self.query.setText(_('Query: ')+'; '.join(parts))
+        self.comments_view.show_data('<h2>'+_('Downloading, please wait')+
+                '<span id="dots">.</span></h2>'+
+                '''
+                <script type="text/javascript">
+                window.onload=function(){
+                    var dotspan = document.getElementById('dots');
+                    window.setInterval(function(){
+                        if(dotspan.textContent == '...'){
+                        dotspan.textContent = '.';
+                        }
+                        else{
+                        dotspan.textContent += '.';
+                        }
+                    }, 500);
+                }
+                </script>
+                ''')
+
+class FullFetch(QDialog): # {{{
+
+    def __init__(self, parent=None):
+        QDialog.__init__(self, parent)
+
+        self.setWindowTitle(_('Downloading metadata...'))
+        self.setWindowIcon(QIcon(I('metadata.png')))
+
+        self.stack = QStackedWidget()
+        self.l = l = QVBoxLayout()
+        self.setLayout(l)
+        l.addWidget(self.stack)
+
+        self.bb = QDialogButtonBox(QDialogButtonBox.Cancel)
+        l.addWidget(self.bb)
+        self.bb.rejected.connect(self.reject)
+
+        self.identify_widget = IdentifyWidget(self)
+        self.stack.addWidget(self.identify_widget)
+        self.resize(850, 500)
+
+    def accept(self):
+        # Prevent pressing Enter from closing the dialog
+        pass
+
+    def start(self, title=None, authors=None, identifiers={}):
+        self.identify_widget.start(title=title, authors=authors,
+                identifiers=identifiers)
+        self.exec_()
+# }}}
+
+if __name__ == '__main__':
+    app = QApplication([])
+    d = FullFetch()
+    d.start(title='great gatsby', authors=['Fitzgerald'])
+
diff --git a/src/calibre/manual/server.rst b/src/calibre/manual/server.rst
index 6d1adc88cd..82ec5c2927 100644
--- a/src/calibre/manual/server.rst
+++ b/src/calibre/manual/server.rst
@@ -16,7 +16,7 @@ Here, we will show you how to integrate the |app| content server into another se
 Using a reverse proxy
 -----------------------
 
-This is the simplest approach as it allows you to use the binary calibre install with no external dependencies/system integration requirements.
+A reverse proxy is when your normal server accepts incoming requests and passes them onto the calibre server. It then reads the response from the calibre server and forwards it to the client. This means that you can simply run the calibre server as normal without trying to integrate it closely with your main server, and you can take advantage of whatever authentication systems you main server has in place. This is the simplest approach as it allows you to use the binary calibre install with no external dependencies/system integration requirements. Below, is an example of how to achieve this with Apache as your main server, but it will work with any server that supports Reverse Proxies.
 
 First start the |app| content server as shown below::
 
@@ -33,7 +33,7 @@ The exact technique for enabling the proxy modules will vary depending on your A
     RewriteRule ^/calibre/(.*) http://localhost:8080/calibre/$1 [proxy]
     RewriteRule ^/calibre http://localhost:8080 [proxy]
 
-That's all, you will now be able to access the |app| Content Server under the /calibre URL in your apache server.
+That's all, you will now be able to access the |app| Content Server under the /calibre URL in your apache server. The above rules pass all requests under /calibre to the calibre server running on port 8080 and thanks to the --url-prefix option above, the calibre server handles them transparently.
 
 .. note:: If you are willing to devote an entire VirtualHost to the content server, then there is no need to use --url-prefix and RewriteRule, instead just use the ProxyPass directive.
 

From fc1e9175fcb40d95c701f4b2d8a3c1025c4c2aad Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 22:00:53 -0600
Subject: [PATCH 55/57] Fix some server settings not being applied when
 clicking start server in Preferences->Sharing over the net

---
 src/calibre/gui2/preferences/server.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/src/calibre/gui2/preferences/server.py b/src/calibre/gui2/preferences/server.py
index 82519f17cd..421dbe737f 100644
--- a/src/calibre/gui2/preferences/server.py
+++ b/src/calibre/gui2/preferences/server.py
@@ -57,17 +57,8 @@ class ConfigWidget(ConfigWidgetBase, Ui_Form):
 
         r('autolaunch_server', config)
 
-    def set_server_options(self):
-        c = self.proxy
-        c.set('port', self.opt_port.value())
-        c.set('username', unicode(self.opt_username.text()).strip())
-        p = unicode(self.opt_password.text()).strip()
-        if not p:
-            p = None
-        c.set('password', p)
-
     def start_server(self):
-        self.set_server_options()
+        ConfigWidgetBase.commit(self)
         self.gui.start_content_server(check_started=False)
         while not self.gui.content_server.is_running and self.gui.content_server.exception is None:
             time.sleep(1)

From 2befe1eb584186f7cff24088e7a1a0edd2ee3b74 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 22:53:08 -0600
Subject: [PATCH 56/57] ...

---
 src/calibre/gui2/metadata/single_download.py | 91 +++++++++++++++-----
 src/calibre/utils/logging.py                 | 54 ++++++++++--
 2 files changed, 116 insertions(+), 29 deletions(-)

diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
index be521b6000..426d0b9e78 100644
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -7,6 +7,8 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
+from threading import Thread, Event
+
 from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF, QIcon, Qt,
         QStyle, QApplication, QDialog, QVBoxLayout, QLabel, QDialogButtonBox,
         QStackedWidget, QWidget, QTableView, QGridLayout, QFontInfo, QPalette)
@@ -14,6 +16,18 @@ from PyQt4.QtWebKit import QWebView
 
 from calibre.customize.ui import metadata_plugins
 from calibre.ebooks.metadata import authors_to_string
+from calibre.utils.logging import ThreadSafeLog, UnicodeHTMLStream
+from calibre.ebooks.metadata.sources.identify import identify
+
+class Log(ThreadSafeLog): # {{{
+
+    def __init__(self):
+        ThreadSafeLog.__init__(self, level=self.DEBUG)
+        self.outputs = [UnicodeHTMLStream()]
+
+    def clear(self):
+        self.outputs[0].clear()
+# }}}
 
 class RichTextDelegate(QStyledItemDelegate): # {{{
 
@@ -95,10 +109,35 @@ class Comments(QWebView): # {{{
         self.setHtml(templ%html)
 # }}}
 
+class IdentifyWorker(Thread):
+
+    def __init__(self, log, abort, title, authors, identifiers):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.log, self.abort = log, abort
+        self.title, self.authors, self.identifiers = (title, authors.
+                identifiers)
+
+        self.results = []
+        self.error = None
+
+    def run(self):
+        try:
+            self.results = identify(self.log, self.abort, title=self.title,
+                    authors=self.authors, identifiers=self.identifiers)
+            for i, result in enumerate(self.results):
+                result.gui_rank = i
+        except:
+            import traceback
+            self.error = traceback.format_exc()
+
 class IdentifyWidget(QWidget):
 
-    def __init__(self, parent=None):
+    def __init__(self, log, parent=None):
         QWidget.__init__(self, parent)
+        self.log = log
+        self.abort = Event()
 
         self.l = l = QGridLayout()
         self.setLayout(l)
@@ -123,7 +162,27 @@ class IdentifyWidget(QWidget):
         self.query.setWordWrap(True)
         l.addWidget(self.query, 2, 0, 1, 2)
 
+        self.comments_view.show_data('<h2>'+_('Downloading')+
+                '<br><span id="dots">.</span></h2>'+
+                '''
+                <script type="text/javascript">
+                window.onload=function(){
+                    var dotspan = document.getElementById('dots');
+                    window.setInterval(function(){
+                        if(dotspan.textContent == '............'){
+                        dotspan.textContent = '.';
+                        }
+                        else{
+                        dotspan.textContent += '.';
+                        }
+                    }, 400);
+                }
+                </script>
+                ''')
+
     def start(self, title=None, authors=None, identifiers={}):
+        self.log.clear()
+        self.log('Starting download')
         parts = []
         if title:
             parts.append('title:'+title)
@@ -133,28 +192,18 @@ class IdentifyWidget(QWidget):
             x = ', '.join('%s:%s'%(k, v) for k, v in identifiers)
             parts.append(x)
         self.query.setText(_('Query: ')+'; '.join(parts))
-        self.comments_view.show_data('<h2>'+_('Downloading, please wait')+
-                '<span id="dots">.</span></h2>'+
-                '''
-                <script type="text/javascript">
-                window.onload=function(){
-                    var dotspan = document.getElementById('dots');
-                    window.setInterval(function(){
-                        if(dotspan.textContent == '...'){
-                        dotspan.textContent = '.';
-                        }
-                        else{
-                        dotspan.textContent += '.';
-                        }
-                    }, 500);
-                }
-                </script>
-                ''')
+        self.log(unicode(self.query.text()))
+
+        self.worker = IdentifyWorker(self.log, self.abort, self.title,
+                self.authors, self.identifiers)
+
+        # self.worker.start()
 
 class FullFetch(QDialog): # {{{
 
-    def __init__(self, parent=None):
+    def __init__(self, log, parent=None):
         QDialog.__init__(self, parent)
+        self.log = log
 
         self.setWindowTitle(_('Downloading metadata...'))
         self.setWindowIcon(QIcon(I('metadata.png')))
@@ -168,7 +217,7 @@ class FullFetch(QDialog): # {{{
         l.addWidget(self.bb)
         self.bb.rejected.connect(self.reject)
 
-        self.identify_widget = IdentifyWidget(self)
+        self.identify_widget = IdentifyWidget(log, self)
         self.stack.addWidget(self.identify_widget)
         self.resize(850, 500)
 
@@ -184,6 +233,6 @@ class FullFetch(QDialog): # {{{
 
 if __name__ == '__main__':
     app = QApplication([])
-    d = FullFetch()
+    d = FullFetch(Log())
     d.start(title='great gatsby', authors=['Fitzgerald'])
 
diff --git a/src/calibre/utils/logging.py b/src/calibre/utils/logging.py
index f4b2e6f0b6..45e21ded39 100644
--- a/src/calibre/utils/logging.py
+++ b/src/calibre/utils/logging.py
@@ -14,7 +14,7 @@ import sys, traceback, cStringIO
 from functools import partial
 from threading import RLock
 
-
+from calibre import isbytestring, force_unicode, as_unicode
 
 class Stream(object):
 
@@ -63,15 +63,16 @@ class FileStream(Stream):
 
 class HTMLStream(Stream):
 
+    color = {
+            DEBUG: '<span style="color:green">',
+            INFO:'<span>',
+            WARN: '<span style="color:yellow">',
+            ERROR: '<span style="color:red">'
+            }
+    normal = '</span>'
+
     def __init__(self, stream=sys.stdout):
         Stream.__init__(self, stream)
-        self.color = {
-                      DEBUG: '<span style="color:green">',
-                      INFO:'<span>',
-                      WARN: '<span style="color:yellow">',
-                      ERROR: '<span style="color:red">'
-                      }
-        self.normal = '</span>'
 
     def prints(self, level, *args, **kwargs):
         self.stream.write(self.color[level])
@@ -82,6 +83,43 @@ class HTMLStream(Stream):
     def flush(self):
         self.stream.flush()
 
+class UnicodeHTMLStream(HTMLStream):
+
+    def __init__(self):
+        self.clear()
+
+    def flush(self):
+        pass
+
+    def prints(self, level, *args, **kwargs):
+        col = self.color[level]
+        if col != self.last_col:
+            if self.data:
+                self.data.append(self.normal)
+            self.data.append(col)
+            self.last_col = col
+
+        sep  = kwargs.get(u'sep', u' ')
+        end  = kwargs.get(u'end', u'\n')
+
+        for arg in args:
+            if isbytestring(arg):
+                arg = force_unicode(arg)
+            elif not isinstance(arg, unicode):
+                arg = as_unicode(arg)
+            self.data.append(arg+sep)
+        self.data.append(end)
+
+    def clear(self):
+        self.data = []
+        self.last_col = self.color[INFO]
+
+    @property
+    def html(self):
+        end = self.normal if self.data else u''
+        return u''.join(self.data) + end
+
+
 class Log(object):
 
     DEBUG = DEBUG

From 011403978718034d2817e19ce0b91a20fc766f76 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 22:54:15 -0600
Subject: [PATCH 57/57] ...

---
 src/calibre/gui2/metadata/single_download.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
index 426d0b9e78..049ac611c5 100644
--- a/src/calibre/gui2/metadata/single_download.py
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -116,7 +116,7 @@ class IdentifyWorker(Thread):
         self.daemon = True
 
         self.log, self.abort = log, abort
-        self.title, self.authors, self.identifiers = (title, authors.
+        self.title, self.authors, self.identifiers = (title, authors,
                 identifiers)
 
         self.results = []
@@ -194,8 +194,8 @@ class IdentifyWidget(QWidget):
         self.query.setText(_('Query: ')+'; '.join(parts))
         self.log(unicode(self.query.text()))
 
-        self.worker = IdentifyWorker(self.log, self.abort, self.title,
-                self.authors, self.identifiers)
+        self.worker = IdentifyWorker(self.log, self.abort, title,
+                authors, identifiers)
 
         # self.worker.start()