From 1d84c0d6ac90f04bcadbea0dffec75f1d38677db Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 3 Apr 2011 13:52:59 -0600
Subject: [PATCH 01/50] developpez.com by louhike

---
 recipes/developpez.recipe | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 recipes/developpez.recipe
diff --git a/recipes/developpez.recipe b/recipes/developpez.recipe
new file mode 100644
index 0000000000..707e702c0a
--- /dev/null
+++ b/recipes/developpez.recipe
@@ -0,0 +1,21 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301849956(BasicNewsRecipe):
+    title          = u'Developpez.com'
+    description = u'Toutes les news du site Developpez.com'
+    publisher = u'Developpez.com'
+    timefmt = ' [%a, %d %b, %Y]'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    encoding = 'ISO-8859-1'
+    language = 'fr'
+    __author__ = 'louhike'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'content'})]
+
+    feeds = [(u'Tous les articles', u'http://www.developpez.com/index/rss')]
+
+    def get_cover_url(self):
+        return 'http://javascript.developpez.com/template/images/logo.gif'
+

From 492d16e5c996418ec311cfc5a1a2462b1889eaea Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 3 Apr 2011 14:31:14 -0600
Subject: [PATCH 02/50] ODT Input: Fix handling of the <text:s> element. Fixes
 #749655 (Private bug)

---
 src/odf/odf2xhtml.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py
index 390d407d16..26da9d9905 100644
--- a/src/odf/odf2xhtml.py
+++ b/src/odf/odf2xhtml.py
@@ -1386,12 +1386,19 @@ ol, ul { padding-left: 2em; }
         self.purgedata()
 
     def s_text_s(self, tag, attrs):
-        """ Generate a number of spaces. ODF has an element; HTML uses &nbsp;
-            We use &#160; so we can send the output through an XML parser if we desire to
+        # Changed by Kovid to fix non breaking spaces being prepended to
+        # element instead of being part of the text flow.
+        # We don't use an entity for the nbsp as the contents of self.data will
+        # be escaped on writeout.
+        """ Generate a number of spaces. We use the non breaking space for
+        the text:s ODF element.
         """
-        c = attrs.get( (TEXTNS,'c'),"1")
-        for x in xrange(int(c)):
-            self.writeout('&#160;')
+        try:
+            c = int(attrs.get((TEXTNS, 'c'), 1))
+        except:
+            c = 0
+        if c > 0:
+            self.data.append(u'\u00a0'*c)
 
     def s_text_span(self, tag, attrs):
         """ The <text:span> element matches the <span> element in HTML. It is

From 6be7471d2e7d93793de6e25e7e9222cb82b49cc4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 08:02:28 -0600
Subject: [PATCH 03/50] F-Secure by louhike and more work on the new metadata
 download system

---
 recipes/f_secure.recipe                       |  22 +++
 src/calibre/ebooks/metadata/sources/amazon.py |   1 +
 src/calibre/ebooks/metadata/sources/base.py   |   6 +
 .../ebooks/metadata/sources/identify.py       | 172 ++++++++++++++++--
 4 files changed, 186 insertions(+), 15 deletions(-)
 create mode 100644 recipes/f_secure.recipe

diff --git a/recipes/f_secure.recipe b/recipes/f_secure.recipe
new file mode 100644
index 0000000000..f276a4961a
--- /dev/null
+++ b/recipes/f_secure.recipe
@@ -0,0 +1,22 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+
+class AdvancedUserRecipe1301860159(BasicNewsRecipe):
+    title          = u'F-Secure Weblog'
+    language = 'en'
+    __author__ = 'louhike'
+    description = u'All the news from the weblog of F-Secure'
+    publisher = u'F-Secure'
+    timefmt = ' [%a, %d %b, %Y]'
+    encoding = 'ISO-8859-1'
+    oldest_article = 7
+    max_articles_per_feed = 100
+    no_stylesheets = True
+    use_embedded_content   = False
+    language = 'en_EN'
+    remove_javascript = True
+    keep_only_tags = [dict(name='div', attrs={'class':'modSectionTd2'})]
+    remove_tags = [dict(name='a'),dict(name='hr')]
+
+    feeds          = [(u'Weblog', u'http://www.f-secure.com/weblog/weblog.rss')]
+    def get_cover_url(self):
+        return 'http://www.f-secure.com/weblog/archives/images/company_logo.png'
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index cfa2b09ea8..9334d818ec 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -282,6 +282,7 @@ class Amazon(Source):
     capabilities = frozenset(['identify', 'cover'])
     touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
         'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
+    has_html_comments = True
 
     AMAZON_DOMAINS = {
             'com': _('US'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 7cc4ed3518..08012c3ee8 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -18,6 +18,9 @@ from calibre.utils.titlecase import titlecase
 from calibre.ebooks.metadata import check_isbn
 
 msprefs = JSONConfig('metadata_sources.json')
+msprefs.defaults['txt_comments'] = False
+msprefs.defaults['ignore_fields'] = []
+msprefs.defaults['max_tags'] = 10
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@@ -104,6 +107,9 @@ class Source(Plugin):
     #: during the identify phase
     touched_fields = frozenset()
 
+    #: Set this to True if your plugin return HTML formatted comments
+    has_html_comments = False
+
     def __init__(self, *args, **kwargs):
         Plugin.__init__(self, *args, **kwargs)
         self._isbn_to_identifier_cache = {}
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 1d4d8840e8..ab86e8ffa2 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -8,13 +8,18 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
 import time
+from datetime import datetime
 from Queue import Queue, Empty
 from threading import Thread
 from io import BytesIO
+from operator import attrgetter
 
 from calibre.customize.ui import metadata_plugins
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import create_log, msprefs
 from calibre.ebooks.metadata.xisbn import xisbn
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.utils.date import utc_tz
+from calibre.utils.html2text import html2text
 
 # How long to wait for more results after first result is found
 WAIT_AFTER_FIRST_RESULT = 30 # seconds
@@ -117,14 +122,30 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     log('Merging results from different sources and finding earliest',
             'publication dates')
     start_time = time.time()
-    merged_results = merge_identify_results(results, log)
+    results = merge_identify_results(results, log)
     log('We have %d merged results, merging took: %.2f seconds' %
-            (len(merged_results), time.time() - start_time))
+            (len(results), time.time() - start_time))
+
+    if msprefs['txt_comments']:
+        for r in results:
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)
+
+    dummy = Metadata(_('Unknown'))
+    max_tags = msprefs['max_tags']
+    for f in msprefs['ignore_fields']:
+        for r in results:
+            setattr(r, f, getattr(dummy, f))
+            r.tags = r.tags[:max_tags]
+
+    return results
+
 
 class ISBNMerge(object):
 
     def __init__(self):
         self.pools = {}
+        self.isbnless_results = []
 
     def isbn_in_pool(self, isbn):
         if isbn:
@@ -140,22 +161,143 @@ class ISBNMerge(object):
                 return True
         return False
 
-    def add_result(self, result, isbn):
-        pool = self.isbn_in_pool(isbn)
-        if pool is None:
-            isbns, min_year = xisbn.get_isbn_pool(isbn)
-            if not isbns:
-                isbns = frozenset([isbn])
-            self.pool[isbns] = pool = (min_year, [])
+    def add_result(self, result):
+        isbn = result.isbn
+        if isbn:
+            pool = self.isbn_in_pool(isbn)
+            if pool is None:
+                isbns, min_year = xisbn.get_isbn_pool(isbn)
+                if not isbns:
+                    isbns = frozenset([isbn])
+                self.pool[isbns] = pool = (min_year, [])
+
+            if not self.pool_has_result_from_same_source(pool, result):
+                pool[1].append(result)
+        else:
+            self.isbnless_results.append(result)
+
+    def finalize(self):
+        has_isbn_result = False
+        for results in self.pools.itervalues():
+            if results:
+                has_isbn_result = True
+                break
+        self.has_isbn_result = has_isbn_result
+
+        if has_isbn_result:
+            self.merge_isbn_results()
+        else:
+            self.results = sorted(self.isbnless_results,
+                    key=attrgetter('relevance_in_source'))
+
+        return self.results
+
+    def merge_isbn_results(self):
+        self.results = []
+        for min_year, results in self.pool.itervalues():
+            if results:
+                self.results.append(self.merge(results, min_year))
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
+    def length_merge(self, attr, results, null_value=None, shortest=True):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        values = [x for x in values if len(x) > 0]
+        if not values:
+            return null_value
+        values.sort(key=len, reverse=not shortest)
+        return values[0]
+
+    def random_merge(self, attr, results, null_value=None):
+        values = [getattr(x, attr) for x in results if not x.is_null(attr)]
+        return values[0] if values else null_value
+
+    def merge(self, results, min_year):
+        ans = Metadata(_('Unknown'))
+
+        # We assume the shortest title has the least cruft in it
+        ans.title = self.length_merge('title', results, null_value=ans.title)
+
+        # No harm in having extra authors, maybe something useful like an
+        # editor or translator
+        ans.authors = self.length_merge('authors', results,
+                null_value=ans.authors, shortest=False)
+
+        # We assume the shortest publisher has the least cruft in it
+        ans.publisher = self.length_merge('publisher', results,
+                null_value=ans.publisher)
+
+        # We assume the smallest set of tags has the least cruft in it
+        ans.tags = self.length_merge('tags', results,
+                null_value=ans.tags)
+
+        # We assume the longest series has the most info in it
+        ans.series = self.length_merge('series', results,
+                null_value=ans.series, shortest=False)
+        for r in results:
+            if r.series and r.series == ans.series:
+                ans.series_index = r.series_index
+                break
+
+        # Average the rating over all sources
+        ratings = []
+        for r in results:
+            rating = r.rating
+            if rating and rating > 0 and rating <= 5:
+                ratings.append(rating)
+        if ratings:
+            ans.rating = sum(ratings)/len(ratings)
+
+        # Smallest language is likely to be valid
+        ans.language = self.length_merge('language', results,
+                null_value=ans.language)
+
+        # Choose longest comments
+        ans.comments = self.length_merge('comments', results,
+                null_value=ans.comments, shortest=False)
+
+        # Published date
+        if min_year:
+            min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
+            ans.pubdate = min_date
+        else:
+            min_date = datetime(10000, 1, 1, tzinfo=utc_tz)
+            for r in results:
+                if r.pubdate is not None and r.pubdate < min_date:
+                    min_date = r.pubdate
+            if min_date.year < 10000:
+                ans.pubdate = min_date
+
+        # Identifiers
+        for r in results:
+            ans.identifiers.update(r.identifiers)
+
+        # Merge any other fields with no special handling (random merge)
+        touched_fields = set()
+        for r in results:
+            touched_fields |= r.plugin.touched_fields
+
+        for f in touched_fields:
+            if f.startswith('identifier:') or not ans.is_null(f):
+                continue
+            setattr(ans, f, self.random_merge(f, results,
+                null_value=getattr(ans, f)))
+
+        avg = [x.relevance_in_source for x in results]
+        avg = sum(avg)/len(avg)
+        ans.average_source_relevance = avg
+
+        return ans
 
-        if not self.pool_has_result_from_same_source(pool, result):
-            pool[1].append(result)
 
 def merge_identify_results(result_map, log):
+    isbn_merge = ISBNMerge()
     for plugin, results in result_map.iteritems():
         for result in results:
-            isbn = result.isbn
-            if isbn:
-                isbns, min_year = xisbn.get_isbn_pool(isbn)
+            isbn_merge.add_result(result)
+
+    return isbn_merge.finalize()
+
+
 
 

From 7599a89c472d92cd29afdbf33f7c6faa7526211c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 09:32:32 -0600
Subject: [PATCH 04/50] Fix #750336 (Pocketbook 602/902 2.0.6 FW won't connect)

---
 src/calibre/devices/eb600/driver.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/devices/eb600/driver.py b/src/calibre/devices/eb600/driver.py
index 5374c6c4e2..01277980db 100644
--- a/src/calibre/devices/eb600/driver.py
+++ b/src/calibre/devices/eb600/driver.py
@@ -244,7 +244,8 @@ class POCKETBOOK602(USBMS):
     BCD         = [0x0324]
 
     VENDOR_NAME = ''
-    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902', 'PB903']
+    WINDOWS_MAIN_MEM = WINDOWS_CARD_A_MEM = ['PB602', 'PB603', 'PB902',
+            'PB903', 'PB']
 
 class POCKETBOOK701(USBMS):
 

From 4b7bc8ce365d99a87ce03cd614b3e8e3f5fceb62 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:04:51 -0600
Subject: [PATCH 05/50] Fix #750288 (TimesofIndia news fetch not working)

---
 recipes/toi.recipe | 29 +++++++++++++++++++++++++----
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/recipes/toi.recipe b/recipes/toi.recipe
index 643d120a36..8a772b6f9d 100644
--- a/recipes/toi.recipe
+++ b/recipes/toi.recipe
@@ -1,3 +1,4 @@
+import re
 from calibre.web.feeds.news import BasicNewsRecipe
 
 class TimesOfIndia(BasicNewsRecipe):
@@ -8,10 +9,10 @@ class TimesOfIndia(BasicNewsRecipe):
     max_articles_per_feed = 25
 
     no_stylesheets = True
-    keep_only_tags = [dict(attrs={'class':'maintable12'})]
+    keep_only_tags = [{'class':['maintable12', 'prttabl']}]
     remove_tags = [
             dict(style=lambda x: x and 'float' in x),
-            dict(attrs={'class':'prvnxtbg'}),
+            {'class':['prvnxtbg', 'footbdrin', 'bcclftr']},
     ]
 
     feeds          = [
@@ -38,8 +39,28 @@ class TimesOfIndia(BasicNewsRecipe):
 ('Most Read',
  'http://timesofindia.indiatimes.com/rssfeedmostread.cms')
 ]
-    def print_version(self, url):
-        return url + '?prtpage=1'
+
+    def get_article_url(self, article):
+        url = BasicNewsRecipe.get_article_url(self, article)
+        if '/0Ltimesofindia' in url:
+            url = url.partition('/0L')[-1]
+            url = url.replace('0B', '.').replace('0N', '.com').replace('0C',
+                    '/').replace('0E', '-')
+            url = 'http://' + url.rpartition('/')[0]
+            match = re.search(r'/([0-9a-zA-Z]+?)\.cms', url)
+            if match is not None:
+                num = match.group(1)
+                num = re.sub(r'[^0-9]', '', num)
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    num)
+        else:
+            cms = re.search(r'/(\d+)\.cms', url)
+            if cms is not None:
+                return ('http://timesofindia.indiatimes.com/articleshow/%s.cms?prtpage=1' %
+                    cms.group(1))
+
+        return url
+
 
     def preprocess_html(self, soup):
         return soup

From 7d1c706835bbc17990596804e232233272fc5796 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:41:43 -0600
Subject: [PATCH 06/50] Fix #750101 (Private bug)

---
 src/calibre/ebooks/pdf/fonts.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index 99ab7517c1..9b9e7708a3 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -72,6 +72,7 @@ XMLFont::XMLFont(string* font_name, double size, GfxRGB rgb) :
         size(size-1), line_size(-1.0), italic(false), bold(false), font_name(font_name),
         font_family(NULL), color(rgb)  {
 
+
     if (!this->font_name) this->font_name = new string(DEFAULT_FONT_FAMILY);
     this->font_family = family_name(this->font_name);
     if (strcasestr(font_name->c_str(), "bold")) this->bold = true;
@@ -134,7 +135,15 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 }
 
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
-    XMLFont *f = new XMLFont(font_name, size, rgb);
+    XMLFont *f = NULL;
+    if (font_name == NULL) {
+        string *fn = new string("Unknown");
+        f = new XMLFont(fn, size, rgb);
+        // fn must not be deleted
+    } else {
+        f = new XMLFont(font_name, size, rgb);
+    }
+
     return this->add_font(f);
 }
 

From 83175da4b297af6c46954ded3b4cd4f476302104 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 10:59:57 -0600
Subject: [PATCH 07/50] ...

---
 src/calibre/ebooks/pdf/fonts.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index 9b9e7708a3..c5261298ff 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -136,13 +136,9 @@ Fonts::size_type Fonts::add_font(XMLFont *f) {
 
 Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
     XMLFont *f = NULL;
-    if (font_name == NULL) {
-        string *fn = new string("Unknown");
-        f = new XMLFont(fn, size, rgb);
-        // fn must not be deleted
-    } else {
-        f = new XMLFont(font_name, size, rgb);
-    }
+    if (font_name == NULL) 
+        font_name = new string("Unknown");
+    f = new XMLFont(font_name, size, rgb);
 
     return this->add_font(f);
 }

From 3e1a43e86a50f06d7f71291825b3475db0d73de8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:00:20 -0600
Subject: [PATCH 08/50] ...

---
 src/calibre/ebooks/pdf/fonts.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/pdf/fonts.cpp b/src/calibre/ebooks/pdf/fonts.cpp
index c5261298ff..c3a709869e 100644
--- a/src/calibre/ebooks/pdf/fonts.cpp
+++ b/src/calibre/ebooks/pdf/fonts.cpp
@@ -138,6 +138,7 @@ Fonts::size_type Fonts::add_font(string* font_name, double size, GfxRGB rgb) {
     XMLFont *f = NULL;
     if (font_name == NULL) 
         font_name = new string("Unknown");
+        // font_name must not be deleted
     f = new XMLFont(font_name, size, rgb);
 
     return this->add_font(f);

From d1859b0f784e972e0ff8af16e7b1afbb9f455c4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:14:12 -0600
Subject: [PATCH 09/50] ...

---
 src/calibre/ebooks/metadata/sources/base.py   |   1 +
 .../ebooks/metadata/sources/identify.py       | 194 +++++++++---------
 2 files changed, 100 insertions(+), 95 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 08012c3ee8..d306a02bcb 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -21,6 +21,7 @@ msprefs = JSONConfig('metadata_sources.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 10
+msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index ab86e8ffa2..87d34c0bff 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -21,9 +21,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
 
-# How long to wait for more results after first result is found
-WAIT_AFTER_FIRST_RESULT = 30 # seconds
-
+# Download worker {{{
 class Worker(Thread):
 
     def __init__(self, plugin, kwargs, abort):
@@ -47,99 +45,9 @@ def is_worker_alive(workers):
             return True
     return False
 
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
-    start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
-
-    kwargs = {
-            'title': title,
-            'authors': authors,
-            'identifiers': identifiers,
-            'timeout': timeout,
-    }
-
-    log('Running identify query with parameters:')
-    log(kwargs)
-    log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
-
-    workers = [Worker(p, kwargs, abort) for p in plugins]
-    for w in workers:
-        w.start()
-
-    first_result_at = None
-    results = dict.fromkeys(plugins, [])
-
-    def get_results():
-        found = False
-        for w in workers:
-            try:
-                result = w.rq.get_nowait()
-            except Empty:
-                pass
-            else:
-                results[w.plugin].append(result)
-                found = True
-        return found
-
-    while True:
-        time.sleep(0.2)
-
-        if get_results() and first_result_at is None:
-            first_result_at = time.time()
-
-        if not is_worker_alive(workers):
-            break
-
-        if (first_result_at is not None and time.time() - first_result_at <
-                WAIT_AFTER_FIRST_RESULT):
-            log('Not waiting any longer for more results')
-            abort.set()
-            break
-
-    get_results()
-    sort_kwargs = dict(kwargs)
-    for k in list(sort_kwargs.iterkeys()):
-        if k not in ('title', 'authors', 'identifiers'):
-            sort_kwargs.pop(k)
-
-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
-        if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
-            log(plog)
-            log('\n'+'*'*80)
-
-        for i, result in enumerate(results):
-            result.relevance_in_source = i
-            result.has_cached_cover_url = \
-                plugin.get_cached_cover_url(result.identifiers) is not None
-            result.identify_plugin = plugin
-
-    log('The identify phase took %.2f seconds'%(time.time() - start_time))
-    log('Merging results from different sources and finding earliest',
-            'publication dates')
-    start_time = time.time()
-    results = merge_identify_results(results, log)
-    log('We have %d merged results, merging took: %.2f seconds' %
-            (len(results), time.time() - start_time))
-
-    if msprefs['txt_comments']:
-        for r in results:
-            if r.plugin.has_html_comments and r.comments:
-                r.comments = html2text(r.comments)
-
-    dummy = Metadata(_('Unknown'))
-    max_tags = msprefs['max_tags']
-    for f in msprefs['ignore_fields']:
-        for r in results:
-            setattr(r, f, getattr(dummy, f))
-            r.tags = r.tags[:max_tags]
-
-    return results
+# }}}
 
+# Merge results from different sources {{{
 
 class ISBNMerge(object):
 
@@ -298,6 +206,102 @@ def merge_identify_results(result_map, log):
 
     return isbn_merge.finalize()
 
+# }}}
+
+def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+    start_time = time.time()
+    plugins = list(metadata_plugins['identify'])
+
+    kwargs = {
+            'title': title,
+            'authors': authors,
+            'identifiers': identifiers,
+            'timeout': timeout,
+    }
+
+    log('Running identify query with parameters:')
+    log(kwargs)
+    log('Using plugins:', ', '.join([p.name for p in plugins]))
+    log('The log (if any) from individual plugins is below')
+
+    workers = [Worker(p, kwargs, abort) for p in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    results = dict.fromkeys(plugins, [])
+
+    def get_results():
+        found = False
+        for w in workers:
+            try:
+                result = w.rq.get_nowait()
+            except Empty:
+                pass
+            else:
+                results[w.plugin].append(result)
+                found = True
+        return found
+
+    wait_time = msprefs['wait_after_first_identify_result']
+    while True:
+        time.sleep(0.2)
+
+        if get_results() and first_result_at is None:
+            first_result_at = time.time()
+
+        if not is_worker_alive(workers):
+            break
+
+        if (first_result_at is not None and time.time() - first_result_at <
+                wait_time):
+            log('Not waiting any longer for more results')
+            abort.set()
+            break
+
+    get_results()
+    sort_kwargs = dict(kwargs)
+    for k in list(sort_kwargs.iterkeys()):
+        if k not in ('title', 'authors', 'identifiers'):
+            sort_kwargs.pop(k)
+
+    for plugin, results in results.iteritems():
+        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = plugin.buf.getvalue().strip()
+        if plog:
+            log('\n'+'*'*35, plugin.name, '*'*35)
+            log('Found %d results'%len(results))
+            log(plog)
+            log('\n'+'*'*80)
+
+        for i, result in enumerate(results):
+            result.relevance_in_source = i
+            result.has_cached_cover_url = \
+                plugin.get_cached_cover_url(result.identifiers) is not None
+            result.identify_plugin = plugin
+
+    log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('Merging results from different sources and finding earliest',
+            'publication dates')
+    start_time = time.time()
+    results = merge_identify_results(results, log)
+    log('We have %d merged results, merging took: %.2f seconds' %
+            (len(results), time.time() - start_time))
+
+    if msprefs['txt_comments']:
+        for r in results:
+            if r.plugin.has_html_comments and r.comments:
+                r.comments = html2text(r.comments)
+
+    dummy = Metadata(_('Unknown'))
+    max_tags = msprefs['max_tags']
+    for f in msprefs['ignore_fields']:
+        for r in results:
+            setattr(r, f, getattr(dummy, f))
+            r.tags = r.tags[:max_tags]
+
+    return results
+
 
 
 

From ac3693cfdc586b6c3f89bb5841d6fc881d3c6b7c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 11:36:26 -0600
Subject: [PATCH 10/50] Conversion pipeline: Handle inline <style> tags that
 put all the actuall CSS inside an XML comment. Fixes #750063 (Private bug)

---
 src/calibre/ebooks/oeb/stylizer.py | 25 ++++++++++++++++++-------
 1 file changed, 18 insertions(+), 7 deletions(-)

diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py
index 0cd17387fe..42974be355 100644
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@@ -17,6 +17,8 @@ from cssutils.css import CSSStyleRule, CSSPageRule, CSSStyleDeclaration, \
 from cssutils import profile as cssprofiles
 from lxml import etree
 from lxml.cssselect import css_to_xpath, ExpressionError, SelectorSyntaxError
+
+from calibre import force_unicode
 from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES
 from calibre.ebooks.oeb.base import XPNSMAP, xpath, urlnormalize
 from calibre.ebooks.oeb.profile import PROFILES
@@ -140,13 +142,22 @@ class Stylizer(object):
                 log=logging.getLogger('calibre.css'))
         self.font_face_rules = []
         for elem in head:
-            if elem.tag == XHTML('style') and elem.text \
-               and elem.get('type', CSS_MIME) in OEB_STYLES:
-                text = XHTML_CSS_NAMESPACE + elem.text
-                text = oeb.css_preprocessor(text)
-                stylesheet = parser.parseString(text, href=cssname)
-                stylesheet.namespaces['h'] = XHTML_NS
-                stylesheets.append(stylesheet)
+            if (elem.tag == XHTML('style') and
+                elem.get('type', CSS_MIME) in OEB_STYLES):
+                text = elem.text if elem.text else u''
+                for x in elem:
+                    t = getattr(x, 'text', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                    t = getattr(x, 'tail', None)
+                    if t:
+                        text += u'\n\n' + force_unicode(t, u'utf-8')
+                if text:
+                    text = XHTML_CSS_NAMESPACE + elem.text
+                    text = oeb.css_preprocessor(text)
+                    stylesheet = parser.parseString(text, href=cssname)
+                    stylesheet.namespaces['h'] = XHTML_NS
+                    stylesheets.append(stylesheet)
             elif elem.tag == XHTML('link') and elem.get('href') \
                  and elem.get('rel', 'stylesheet').lower() == 'stylesheet' \
                  and elem.get('type', CSS_MIME).lower() in OEB_STYLES:

From daa01500443eba9894c0d636365271299f343f79 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 12:06:54 -0600
Subject: [PATCH 11/50] When downloading metadata use the gzip transfer
 encoding when possible for a speedup. Fixes #749304 (metadata from google
 books not readable)

---
 src/calibre/ebooks/metadata/google_books.py   | 1 +
 src/calibre/ebooks/metadata/sources/amazon.py | 1 +
 src/calibre/ebooks/metadata/sources/base.py   | 8 ++++++++
 src/calibre/ebooks/metadata/sources/google.py | 1 +
 src/calibre/utils/browser.py                  | 6 +++---
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/calibre/ebooks/metadata/google_books.py b/src/calibre/ebooks/metadata/google_books.py
index 5a5e09234e..2e52bf020d 100644
--- a/src/calibre/ebooks/metadata/google_books.py
+++ b/src/calibre/ebooks/metadata/google_books.py
@@ -193,6 +193,7 @@ class ResultList(list):
 def search(title=None, author=None, publisher=None, isbn=None,
            min_viewability='none', verbose=False, max_results=40):
     br   = browser()
+    br.set_handle_gzip(True)
     start, entries = 1, []
     while start > 0 and len(entries) <= max_results:
         new, start = Query(title=title, author=author, publisher=publisher,
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 9334d818ec..15282ad896 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -283,6 +283,7 @@ class Amazon(Source):
     touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
         'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
     has_html_comments = True
+    supports_gzip_transfer_encoding = True
 
     AMAZON_DOMAINS = {
             'com': _('US'),
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index d306a02bcb..d3b564204f 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -111,6 +111,12 @@ class Source(Plugin):
     #: Set this to True if your plugin return HTML formatted comments
     has_html_comments = False
 
+    #: Setting this to True means that the browser object will add
+    #: Accept-Encoding: gzip to all requests. This can speedup downloads
+    #: but make sure that the source actually supports gzip transfer encoding
+    #: correctly first
+    supports_gzip_transfer_encoding = False
+
     def __init__(self, *args, **kwargs):
         Plugin.__init__(self, *args, **kwargs)
         self._isbn_to_identifier_cache = {}
@@ -134,6 +140,8 @@ class Source(Plugin):
     def browser(self):
         if self._browser is None:
             self._browser = browser(user_agent=random_user_agent())
+            if self.supports_gzip_transfer_encoding:
+                self._browser.set_handle_gzip(True)
         return self._browser.clone_browser()
 
     # }}}
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 989320f710..21c99fdf46 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -160,6 +160,7 @@ class GoogleBooks(Source):
     touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
         'comments', 'publisher', 'identifier:isbn', 'rating',
         'identifier:google']) # language currently disabled
+    supports_gzip_transfer_encoding = True
 
     GOOGLE_COVER = 'http://books.google.com/books?id=%s&printsec=frontcover&img=1'
 
diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index 2f77ede6b3..f188d6b45a 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -38,10 +38,10 @@ class Browser(B):
         self._clone_actions['set_handle_equiv'] = ('set_handle_equiv',
                 args, kwargs)
 
-    def set_handle_gzip(self, *args, **kwargs):
-        B.set_handle_gzip(self, *args, **kwargs)
+    def set_handle_gzip(self, handle):
+        self._set_handler('_gzip', handle)
         self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
-                args, kwargs)
+                (handle,), {})
 
     def set_debug_redirect(self, *args, **kwargs):
         B.set_debug_redirect(self, *args, **kwargs)

From 33f84ba169eb233ac3fc0119d475896199a5c3cc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 12:13:45 -0600
Subject: [PATCH 12/50] ...

---
 src/calibre/ebooks/metadata/sources/amazon.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 15282ad896..61b555b041 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -23,7 +23,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
 
-class Worker(Thread): # {{{
+class Worker(Thread): # Get details {{{
 
     '''
     Get book details from amazons book page in a separate thread

From 6c4b405b0db9517e5a2d2a7a5f541e940ebb935e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 14:22:04 -0600
Subject: [PATCH 13/50] ...

---
 src/calibre/utils/browser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/utils/browser.py b/src/calibre/utils/browser.py
index f188d6b45a..6f8703ab49 100644
--- a/src/calibre/utils/browser.py
+++ b/src/calibre/utils/browser.py
@@ -39,7 +39,7 @@ class Browser(B):
                 args, kwargs)
 
     def set_handle_gzip(self, handle):
-        self._set_handler('_gzip', handle)
+        B._set_handler(self, '_gzip', handle)
         self._clone_actions['set_handle_gzip'] = ('set_handle_gzip',
                 (handle,), {})
 

From 461c128bc287160d1431f058d752e585e54b8410 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 15:57:30 -0600
Subject: [PATCH 14/50] Fix book details popup becoming too tall if there is a
 lot of metadata

---
 src/calibre/gui2/dialogs/book_info.ui | 162 +++++++++++++++-----------
 1 file changed, 95 insertions(+), 67 deletions(-)

diff --git a/src/calibre/gui2/dialogs/book_info.ui b/src/calibre/gui2/dialogs/book_info.ui
index 412126a610..9e9e71eda0 100644
--- a/src/calibre/gui2/dialogs/book_info.ui
+++ b/src/calibre/gui2/dialogs/book_info.ui
@@ -7,15 +7,25 @@
     <x>0</x>
     <y>0</y>
     <width>917</width>
-    <height>480</height>
+    <height>492</height>
    </rect>
   </property>
   <property name="windowTitle">
    <string>Dialog</string>
   </property>
+  <property name="windowIcon">
+   <iconset resource="../../../../resources/images.qrc">
+    <normaloff>:/images/metadata.png</normaloff>:/images/metadata.png</iconset>
+  </property>
   <layout class="QGridLayout" name="gridLayout">
    <item row="0" column="0" colspan="2">
     <widget class="QLabel" name="title">
+     <property name="font">
+      <font>
+       <weight>75</weight>
+       <bold>true</bold>
+      </font>
+     </property>
      <property name="text">
       <string>TextLabel</string>
      </property>
@@ -24,86 +34,104 @@
      </property>
     </widget>
    </item>
-   <item row="1" column="0">
+   <item row="1" column="0" rowspan="3">
     <widget class="CoverView" name="cover"/>
    </item>
    <item row="1" column="1">
-    <layout class="QVBoxLayout" name="verticalLayout">
-     <item>
-      <widget class="QLabel" name="text">
-       <property name="text">
-        <string>TextLabel</string>
-       </property>
-       <property name="alignment">
-        <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
-       </property>
-       <property name="wordWrap">
-        <bool>true</bool>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <widget class="QGroupBox" name="groupBox">
-       <property name="title">
-        <string>Comments</string>
-       </property>
-       <layout class="QVBoxLayout" name="verticalLayout_2">
-        <item>
-         <widget class="QWebView" name="comments">
-          <property name="sizePolicy">
-           <sizepolicy hsizetype="Preferred" vsizetype="Expanding">
-            <horstretch>0</horstretch>
-            <verstretch>0</verstretch>
-           </sizepolicy>
-          </property>
-          <property name="maximumSize">
-           <size>
-            <width>350</width>
-            <height>16777215</height>
-           </size>
-          </property>
-          <property name="url">
-           <url>
-            <string>about:blank</string>
-           </url>
-          </property>
-         </widget>
-        </item>
-       </layout>
-      </widget>
-     </item>
-     <item>
-      <widget class="QCheckBox" name="fit_cover">
-       <property name="text">
-        <string>Fit &amp;cover within view</string>
-       </property>
-      </widget>
-     </item>
-     <item>
-      <layout class="QHBoxLayout" name="horizontalLayout">
+    <widget class="QScrollArea" name="scrollArea">
+     <property name="frameShape">
+      <enum>QFrame::NoFrame</enum>
+     </property>
+     <property name="widgetResizable">
+      <bool>true</bool>
+     </property>
+     <widget class="QWidget" name="scrollAreaWidgetContents">
+      <property name="geometry">
+       <rect>
+        <x>0</x>
+        <y>0</y>
+        <width>435</width>
+        <height>670</height>
+       </rect>
+      </property>
+      <layout class="QVBoxLayout" name="verticalLayout">
        <item>
-        <widget class="QPushButton" name="previous_button">
+        <widget class="QLabel" name="text">
          <property name="text">
-          <string>&amp;Previous</string>
+          <string>TextLabel</string>
          </property>
-         <property name="icon">
-          <iconset resource="../../../../resources/images.qrc">
-           <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset>
+         <property name="alignment">
+          <set>Qt::AlignLeading|Qt::AlignLeft|Qt::AlignTop</set>
+         </property>
+         <property name="wordWrap">
+          <bool>true</bool>
          </property>
         </widget>
        </item>
        <item>
-        <widget class="QPushButton" name="next_button">
-         <property name="text">
-          <string>&amp;Next</string>
-         </property>
-         <property name="icon">
-          <iconset resource="../../../../resources/images.qrc">
-           <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset>
+        <widget class="QGroupBox" name="groupBox">
+         <property name="title">
+          <string>Comments</string>
          </property>
+         <layout class="QVBoxLayout" name="verticalLayout_2">
+          <item>
+           <widget class="QWebView" name="comments">
+            <property name="sizePolicy">
+             <sizepolicy hsizetype="Preferred" vsizetype="Expanding">
+              <horstretch>0</horstretch>
+              <verstretch>0</verstretch>
+             </sizepolicy>
+            </property>
+            <property name="maximumSize">
+             <size>
+              <width>350</width>
+              <height>16777215</height>
+             </size>
+            </property>
+            <property name="url">
+             <url>
+              <string>about:blank</string>
+             </url>
+            </property>
+           </widget>
+          </item>
+         </layout>
         </widget>
        </item>
       </layout>
+     </widget>
+    </widget>
+   </item>
+   <item row="2" column="1">
+    <widget class="QCheckBox" name="fit_cover">
+     <property name="text">
+      <string>Fit &amp;cover within view</string>
+     </property>
+    </widget>
+   </item>
+   <item row="3" column="1">
+    <layout class="QHBoxLayout" name="horizontalLayout">
+     <item>
+      <widget class="QPushButton" name="previous_button">
+       <property name="text">
+        <string>&amp;Previous</string>
+       </property>
+       <property name="icon">
+        <iconset resource="../../../../resources/images.qrc">
+         <normaloff>:/images/previous.png</normaloff>:/images/previous.png</iconset>
+       </property>
+      </widget>
+     </item>
+     <item>
+      <widget class="QPushButton" name="next_button">
+       <property name="text">
+        <string>&amp;Next</string>
+       </property>
+       <property name="icon">
+        <iconset resource="../../../../resources/images.qrc">
+         <normaloff>:/images/next.png</normaloff>:/images/next.png</iconset>
+       </property>
+      </widget>
      </item>
     </layout>
    </item>

From 608cf75dc06723ed6cebb03b2e347e352c337f67 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 16:47:08 -0600
Subject: [PATCH 15/50] More work on metadata identify

---
 INSTALL                                       |  8 +-
 src/calibre/ebooks/metadata/sources/base.py   |  9 +++
 .../ebooks/metadata/sources/identify.py       | 45 +++++++++++
 src/calibre/ebooks/metadata/sources/test.py   | 78 +++++++++++++++++--
 4 files changed, 132 insertions(+), 8 deletions(-)

diff --git a/INSTALL b/INSTALL
index cb8261eff6..93b119b2e1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,6 +1,9 @@
 calibre supports installation from source, only on Linux. 
-On Windows and OS X use the provided installers and use
-the facilities of the calibre-debug command to hack on the calibre source. 
+
+Note that you *do not* need to install from source to hack on
+the calibre source code. To get started with calibre development,
+use a normal calibre install and follow the instructions at
+http://calibre-ebook.com/user_manual/develop.html
 
 On Linux, there are two kinds of installation from source possible.
 Note that both kinds require lots of dependencies as well as a
@@ -45,3 +48,4 @@ This type of install can be run with the command::
     sudo python setup.py develop
 
 Use the -h flag for help on the develop command.
+
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index d3b564204f..5903a5e710 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -93,6 +93,15 @@ class InternalMetadataCompareKeyGen(object):
 
 # }}}
 
+def get_cached_cover_urls(mi):
+    from calibre.customize.ui import metadata_plugins
+    plugins = list(metadata_plugins['identify'])
+    for p in plugins:
+        url = p.get_cached_cover_url(mi.identifiers)
+        if url:
+            yield (p, url)
+
+
 class Source(Plugin):
 
     type = _('Metadata source')
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 87d34c0bff..71554595ad 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -302,6 +302,51 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
 
     return results
 
+if __name__ == '__main__': # tests {{{
+    # To run these test use: calibre-debug -e
+    # src/calibre/ebooks/metadata/sources/identify.py
+    from calibre.ebooks.metadata.sources.test import (test_identify,
+            title_test, authors_test)
+    test_identify(
+        [
 
+            ( # An e-book ISBN not on Amazon, one of the authors is
+              # unknown to Amazon
+                {'identifiers':{'isbn': '9780307459671'},
+                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
+                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
 
+            ),
+
+            (  # This isbn not on amazon
+                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+                    'authors':['Lutz']},
+                [title_test('Learning Python, 3rd Edition',
+                    exact=True), authors_test(['Mark Lutz'])
+                 ]
+
+            ),
+
+            ( # Sophisticated comment formatting
+                {'identifiers':{'isbn': '9781416580829'}},
+                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                    exact=True), authors_test(['Dan Brown'])]
+            ),
+
+            ( # No specific problems
+                {'identifiers':{'isbn': '0743273567'}},
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['F. Scott Fitzgerald'])]
+            ),
+
+            (  # A newer book
+                {'identifiers':{'isbn': '9780316044981'}},
+                [title_test('The Heroes', exact=True),
+                    authors_test(['Joe Abercrombie'])]
+
+            ),
+
+        ])
+# }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index de95a9b887..a7dcc2fa14 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -14,7 +14,8 @@ from threading import Event
 from calibre.customize.ui import metadata_plugins
 from calibre import prints, sanitize_file_name2
 from calibre.ebooks.metadata import check_isbn
-from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.base import (create_log,
+        get_cached_cover_urls)
 
 def isbn_test(isbn):
     isbn_ = check_isbn(isbn)
@@ -45,8 +46,75 @@ def authors_test(authors):
 
     return test
 
+def init_test(tdir_name):
+    tdir = tempfile.gettempdir()
+    lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
+    log = create_log(open(lf, 'wb'))
+    abort = Event()
+    return tdir, lf, log, abort
 
-def test_identify_plugin(name, tests):
+def test_identify(tests): # {{{
+    '''
+    :param tests: List of 2-tuples. Each two tuple is of the form (args,
+                  test_funcs). args is a dict of keyword arguments to pass to
+                  the identify method. test_funcs are callables that accept a
+                  Metadata object and return True iff the object passes the
+                  test.
+    '''
+    from calibre.ebooks.metadata.sources.identify import identify
+
+    tdir, lf, log, abort = init_test('Full Identify')
+
+    times = []
+
+    for kwargs, test_funcs in tests:
+        prints('Running test with:', kwargs)
+        args = (log, abort)
+        start_time = time.time()
+        results = identify(*args, **kwargs)
+        total_time = time.time() - start_time
+        times.append(total_time)
+        if not results:
+            prints('identify failed to find any results')
+            break
+
+        prints('Found', len(results), 'matches:', end=' ')
+        prints('Smaller relevance means better match')
+
+        for i, mi in enumerate(results):
+            prints('*'*30, 'Relevance:', i, '*'*30)
+            prints(mi)
+            prints('\nCached cover URLs    :',
+                    [x[0].name for x in get_cached_cover_urls(mi)])
+            prints('*'*75, '\n\n')
+
+        possibles = []
+        for mi in results:
+            test_failed = False
+            for tfunc in test_funcs:
+                if not tfunc(mi):
+                    test_failed = True
+                    break
+            if not test_failed:
+                possibles.append(mi)
+
+        if not possibles:
+            prints('ERROR: No results that passed all tests were found')
+            prints('Log saved to', lf)
+            raise SystemExit(1)
+
+        if results[0] is not possibles[0]:
+            prints('Most relevant result failed the tests')
+            raise SystemExit(1)
+
+    prints('Average time per query', sum(times)/len(times))
+
+    if os.stat(lf).st_size > 10:
+        prints('There were some errors/warnings, see log', lf)
+
+# }}}
+
+def test_identify_plugin(name, tests): # {{{
     '''
     :param name: Plugin name
     :param tests: List of 2-tuples. Each two tuple is of the form (args,
@@ -62,10 +130,7 @@ def test_identify_plugin(name, tests):
             break
     prints('Testing the identify function of', plugin.name)
 
-    tdir = tempfile.gettempdir()
-    lf = os.path.join(tdir, plugin.name.replace(' ', '')+'_identify_test.txt')
-    log = create_log(open(lf, 'wb'))
-    abort = Event()
+    tdir, lf, log, abort = init_test(plugin.name)
     prints('Log saved to', lf)
 
     times = []
@@ -159,4 +224,5 @@ def test_identify_plugin(name, tests):
 
     if os.stat(lf).st_size > 10:
         prints('There were some errors/warnings, see log', lf)
+# }}}
 

From bcd06ca5799c5116d15d30cdfee71233405044ec Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 16:48:05 -0600
Subject: [PATCH 16/50] ...

---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index 2c916fc7d7..b518e977c8 100644
--- a/README
+++ b/README
@@ -7,7 +7,7 @@ reading. It is cross platform, running on Linux, Windows and OS X.
 For screenshots: https://calibre-ebook.com/demo
 
 For installation/usage instructions please see
-http://calibre-ebook.com
+http://calibre-ebook.com/user_manual
 
 For source code access:
 bzr branch lp:calibre

From f5c1453f43f7b3a4a7149f9c661f2e694e9a2864 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 4 Apr 2011 21:39:03 -0600
Subject: [PATCH 17/50] Fix #750932 (Updated recipe for Perfil)

---
 recipes/perfil.recipe | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/recipes/perfil.recipe b/recipes/perfil.recipe
index 7db86f9d4a..1104202318 100644
--- a/recipes/perfil.recipe
+++ b/recipes/perfil.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 perfil.com
 '''
@@ -39,9 +39,9 @@ class Perfil(BasicNewsRecipe):
                       dict(name=['iframe','embed','object','base','meta','link'])
                      ,dict(name='a', attrs={'href':'#comentarios'})
                      ,dict(name='div', attrs={'class':'foto3'})
-                     ,dict(name='img', attrs={'alt':'ampliar'})
+                     ,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
                     ]
-    keep_only_tags=[dict(attrs={'class':['bd468a','cuerpoSuperior']})]
+    keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})]
     remove_attributes=['onload','lang','width','height','border']
 
     feeds = [

From 0d924d81efad3e5fc6d9f178cc5648099a7f016c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:33:29 -0600
Subject: [PATCH 18/50] Replace the Arora and Camino user agents with Firefox 4
 user agents as Amazon was serving different content for those UAs

---
 src/calibre/__init__.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index c3aca457ad..1799072045 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -281,16 +281,17 @@ def get_parsed_proxy(typ='http', debug=True):
 
 def random_user_agent():
     choices = [
-        'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/523.15 (KHTML, like Gecko, Safari/419.3) Arora/0.3 (Change: 287 c9dfb30)',
+        'Mozilla/5.0 (Windows NT 5.2; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
+        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
         'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
         'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.2.153.1 Safari/525.19',
         'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.11) Gecko/20101012 Firefox/3.6.11',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en; rv:1.8.1.14) Gecko/20080409 Camino/1.6 (like Firefox/2.0.0.14)',
-        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.0.1) Gecko/20060118 Camino/1.0b2+',
         'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.472.63 Safari/534.3',
         'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko) Chrome/4.0.249.78 Safari/532.5',
         'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
     ]
+    #return choices[-1]
     return choices[random.randint(0, len(choices)-1)]
 
 

From 6e98d78dd753e13e08921215d1d1caccffa80f67 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:35:53 -0600
Subject: [PATCH 19/50] Aamzon plugin: Workaround broken encoding. Detect and
 use mobile user agent search results page

---
 src/calibre/ebooks/metadata/sources/amazon.py | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 61b555b041..d1c8f24da6 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -64,7 +64,7 @@ class Worker(Thread): # Get details {{{
 
         raw = xml_to_unicode(raw, strip_encoding_pats=True,
                 resolve_entities=True)[0]
-        # open('/t/t.html', 'wb').write(raw)
+        #open('/t/t.html', 'wb').write(raw)
 
         if '<title>404 - ' in raw:
             self.log.error('URL malformed: %r'%self.url)
@@ -218,6 +218,9 @@ class Worker(Thread): # Get details {{{
                     ' @class="emptyClear" or @href]'):
                 c.getparent().remove(c)
             desc = tostring(desc, method='html', encoding=unicode).strip()
+            # Encoding bug in Amazon data U+fffd (replacement char)
+            # in some examples it is present in place of '
+            desc = desc.replace('\ufffd', "'")
             # remove all attributes from tags
             desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
             # Collapse whitespace
@@ -410,6 +413,18 @@ class Amazon(Source):
                     if 'bulk pack' not in title:
                         matches.append(a.get('href'))
                     break
+            if not matches:
+                # This can happen for some user agents that Amazon thinks are
+                # mobile/less capable
+                log('Trying alternate results page markup')
+                for td in root.xpath(
+                    r'//div[@id="Results"]/descendant::td[starts-with(@id, "search:Td:")]'):
+                    for a in td.xpath(r'descendant::td[@class="dataColumn"]/descendant::a[@href]/span[@class="srTitle"]/..'):
+                        title = tostring(a, method='text', encoding=unicode).lower()
+                        if 'bulk pack' not in title:
+                            matches.append(a.get('href'))
+                        break
+
 
         # Keep only the top 5 matches as the matches are sorted by relevance by
         # Amazon so lower matches are not likely to be very relevant

From 8dd435ecdbf4f5b5cbec211eff71cea5d3eeb3f2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:36:55 -0600
Subject: [PATCH 20/50] Test and debug the new identify() function

---
 src/calibre/ebooks/metadata/sources/base.py   |  4 +-
 .../ebooks/metadata/sources/identify.py       | 62 ++++++++++---------
 src/calibre/ebooks/metadata/sources/test.py   |  2 +
 src/calibre/ebooks/metadata/xisbn.py          |  6 +-
 4 files changed, 43 insertions(+), 31 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 5903a5e710..86468141e1 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -20,7 +20,7 @@ from calibre.ebooks.metadata import check_isbn
 msprefs = JSONConfig('metadata_sources.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
-msprefs.defaults['max_tags'] = 10
+msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
 
 def create_log(ostream=None):
@@ -95,7 +95,7 @@ class InternalMetadataCompareKeyGen(object):
 
 def get_cached_cover_urls(mi):
     from calibre.customize.ui import metadata_plugins
-    plugins = list(metadata_plugins['identify'])
+    plugins = list(metadata_plugins(['identify']))
     for p in plugins:
         url = p.get_cached_cover_url(mi.identifiers)
         if url:
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 71554595ad..b65e97a10d 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -57,13 +57,13 @@ class ISBNMerge(object):
 
     def isbn_in_pool(self, isbn):
         if isbn:
-            for p in self.pools:
-                if isbn in p:
-                    return p
+            for isbns, pool in self.pools.iteritems():
+                if isbn in isbns:
+                    return pool
         return None
 
     def pool_has_result_from_same_source(self, pool, result):
-        results = self.pools[pool][1]
+        results = pool[1]
         for r in results:
             if r.identify_plugin is result.identify_plugin:
                 return True
@@ -77,7 +77,7 @@ class ISBNMerge(object):
                 isbns, min_year = xisbn.get_isbn_pool(isbn)
                 if not isbns:
                     isbns = frozenset([isbn])
-                self.pool[isbns] = pool = (min_year, [])
+                self.pools[isbns] = pool = (min_year, [])
 
             if not self.pool_has_result_from_same_source(pool, result):
                 pool[1].append(result)
@@ -102,7 +102,7 @@ class ISBNMerge(object):
 
     def merge_isbn_results(self):
         self.results = []
-        for min_year, results in self.pool.itervalues():
+        for min_year, results in self.pools.itervalues():
             if results:
                 self.results.append(self.merge(results, min_year))
 
@@ -169,11 +169,11 @@ class ISBNMerge(object):
             min_date = datetime(min_year, 1, 2, tzinfo=utc_tz)
             ans.pubdate = min_date
         else:
-            min_date = datetime(10000, 1, 1, tzinfo=utc_tz)
+            min_date = datetime(3001, 1, 1, tzinfo=utc_tz)
             for r in results:
                 if r.pubdate is not None and r.pubdate < min_date:
                     min_date = r.pubdate
-            if min_date.year < 10000:
+            if min_date.year < 3000:
                 ans.pubdate = min_date
 
         # Identifiers
@@ -183,7 +183,7 @@ class ISBNMerge(object):
         # Merge any other fields with no special handling (random merge)
         touched_fields = set()
         for r in results:
-            touched_fields |= r.plugin.touched_fields
+            touched_fields |= r.identify_plugin.touched_fields
 
         for f in touched_fields:
             if f.startswith('identifier:') or not ans.is_null(f):
@@ -210,7 +210,7 @@ def merge_identify_results(result_map, log):
 
 def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     start_time = time.time()
-    plugins = list(metadata_plugins['identify'])
+    plugins = list(metadata_plugins(['identify']))
 
     kwargs = {
             'title': title,
@@ -229,7 +229,10 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
         w.start()
 
     first_result_at = None
-    results = dict.fromkeys(plugins, [])
+    results = {}
+    for p in plugins:
+        results[p] = []
+    logs = dict([(w.plugin, w.buf) for w in workers])
 
     def get_results():
         found = False
@@ -253,28 +256,31 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
         if not is_worker_alive(workers):
             break
 
-        if (first_result_at is not None and time.time() - first_result_at <
+        if (first_result_at is not None and time.time() - first_result_at >
                 wait_time):
             log('Not waiting any longer for more results')
             abort.set()
             break
 
-    get_results()
+    while not abort.is_set() and get_results():
+        pass
+
     sort_kwargs = dict(kwargs)
     for k in list(sort_kwargs.iterkeys()):
         if k not in ('title', 'authors', 'identifiers'):
             sort_kwargs.pop(k)
 
-    for plugin, results in results.iteritems():
-        results.sort(key=plugin.identify_results_keygen(**sort_kwargs))
-        plog = plugin.buf.getvalue().strip()
+    for plugin, presults in results.iteritems():
+        presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
+        plog = logs[plugin].getvalue().strip()
+        log('\n'+'*'*35, plugin.name, '*'*35)
+        log('Request extra headers:', plugin.browser.addheaders)
+        log('Found %d results'%len(presults))
         if plog:
-            log('\n'+'*'*35, plugin.name, '*'*35)
-            log('Found %d results'%len(results))
             log(plog)
-            log('\n'+'*'*80)
+        log('\n'+'*'*80)
 
-        for i, result in enumerate(results):
+        for i, result in enumerate(presults):
             result.relevance_in_source = i
             result.has_cached_cover_url = \
                 plugin.get_cached_cover_url(result.identifiers) is not None
@@ -295,10 +301,10 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
 
     dummy = Metadata(_('Unknown'))
     max_tags = msprefs['max_tags']
-    for f in msprefs['ignore_fields']:
-        for r in results:
+    for r in results:
+        for f in msprefs['ignore_fields']:
             setattr(r, f, getattr(dummy, f))
-            r.tags = r.tags[:max_tags]
+        r.tags = r.tags[:max_tags]
 
     return results
 
@@ -307,8 +313,7 @@ if __name__ == '__main__': # tests {{{
     # src/calibre/ebooks/metadata/sources/identify.py
     from calibre.ebooks.metadata.sources.test import (test_identify,
             title_test, authors_test)
-    test_identify(
-        [
+    tests = [
 
             ( # An e-book ISBN not on Amazon, one of the authors is
               # unknown to Amazon
@@ -330,14 +335,14 @@ if __name__ == '__main__': # tests {{{
 
             ( # Sophisticated comment formatting
                 {'identifiers':{'isbn': '9781416580829'}},
-                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                [title_test('Angels & Demons',
                     exact=True), authors_test(['Dan Brown'])]
             ),
 
             ( # No specific problems
                 {'identifiers':{'isbn': '0743273567'}},
                 [title_test('The great gatsby', exact=True),
-                    authors_test(['F. Scott Fitzgerald'])]
+                    authors_test(['Francis Scott Fitzgerald'])]
             ),
 
             (  # A newer book
@@ -347,6 +352,7 @@ if __name__ == '__main__': # tests {{{
 
             ),
 
-        ])
+        ]
+    test_identify(tests[4:5])
 # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index a7dcc2fa14..428da3ef65 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -64,6 +64,7 @@ def test_identify(tests): # {{{
     from calibre.ebooks.metadata.sources.identify import identify
 
     tdir, lf, log, abort = init_test('Full Identify')
+    prints('Log saved to', lf)
 
     times = []
 
@@ -129,6 +130,7 @@ def test_identify_plugin(name, tests): # {{{
             plugin = x
             break
     prints('Testing the identify function of', plugin.name)
+    prints('Using extra headers:', plugin.browser.addheaders)
 
     tdir, lf, log, abort = init_test(plugin.name)
     prints('Log saved to', lf)
diff --git a/src/calibre/ebooks/metadata/xisbn.py b/src/calibre/ebooks/metadata/xisbn.py
index 69cc3f7cb3..56156c034e 100644
--- a/src/calibre/ebooks/metadata/xisbn.py
+++ b/src/calibre/ebooks/metadata/xisbn.py
@@ -73,7 +73,11 @@ class xISBN(object):
 
     def get_isbn_pool(self, isbn):
         data = self.get_data(isbn)
-        isbns = frozenset([x.get('isbn') for x in data if 'isbn' in x])
+        raw = tuple(x.get('isbn') for x in data if 'isbn' in x)
+        isbns = []
+        for x in raw:
+            isbns += x
+        isbns = frozenset(isbns)
         min_year = 100000
         for x in data:
             try:

From 42856543527e198e109de8f713cdf2586f06f906 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 11:52:52 -0600
Subject: [PATCH 21/50] ...

---
 src/calibre/ebooks/metadata/sources/identify.py | 15 ++++++++-------
 src/calibre/ebooks/metadata/sources/test.py     |  8 ++++++--
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index b65e97a10d..322a61bd83 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -208,7 +208,7 @@ def merge_identify_results(result_map, log):
 
 # }}}
 
-def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
+def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
     start_time = time.time()
     plugins = list(metadata_plugins(['identify']))
 
@@ -222,7 +222,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     log('Running identify query with parameters:')
     log(kwargs)
     log('Using plugins:', ', '.join([p.name for p in plugins]))
-    log('The log (if any) from individual plugins is below')
+    log('The log from individual plugins is below')
 
     workers = [Worker(p, kwargs, abort) for p in plugins]
     for w in workers:
@@ -273,7 +273,7 @@ def identify(log, abort, title=None, authors=None, identifiers=[], timeout=30):
     for plugin, presults in results.iteritems():
         presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
         plog = logs[plugin].getvalue().strip()
-        log('\n'+'*'*35, plugin.name, '*'*35)
+        log('\n'+'*'*30, plugin.name, '*'*30)
         log('Request extra headers:', plugin.browser.addheaders)
         log('Found %d results'%len(presults))
         if plog:
@@ -324,10 +324,10 @@ if __name__ == '__main__': # tests {{{
 
             ),
 
-            (  # This isbn not on amazon
-                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+            (  # Test absence of identifiers
+                {'title':'Learning Python',
                     'authors':['Lutz']},
-                [title_test('Learning Python, 3rd Edition',
+                [title_test('Learning Python',
                     exact=True), authors_test(['Mark Lutz'])
                  ]
 
@@ -353,6 +353,7 @@ if __name__ == '__main__': # tests {{{
             ),
 
         ]
-    test_identify(tests[4:5])
+    #test_identify(tests[1:2])
+    test_identify(tests)
 # }}}
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index 428da3ef65..2e72f86c47 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -69,6 +69,9 @@ def test_identify(tests): # {{{
     times = []
 
     for kwargs, test_funcs in tests:
+        log('#'*80)
+        log('### Running test with:', kwargs)
+        log('#'*80)
         prints('Running test with:', kwargs)
         args = (log, abort)
         start_time = time.time()
@@ -108,10 +111,11 @@ def test_identify(tests): # {{{
             prints('Most relevant result failed the tests')
             raise SystemExit(1)
 
+        log('\n\n')
+
     prints('Average time per query', sum(times)/len(times))
 
-    if os.stat(lf).st_size > 10:
-        prints('There were some errors/warnings, see log', lf)
+    prints('Full log is at:', lf)
 
 # }}}
 

From 547454c705b889c61bfd3a4b2998395b1aa38d15 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 12:09:27 -0600
Subject: [PATCH 22/50] Make the full identify() log a lot more useful

---
 src/calibre/ebooks/metadata/sources/identify.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 322a61bd83..17adc6ffc6 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -34,10 +34,12 @@ class Worker(Thread):
         self.log = create_log(self.buf)
 
     def run(self):
+        start = time.time()
         try:
             self.plugin.identify(self.log, self.rq, self.abort, **self.kwargs)
         except:
             self.log.exception('Plugin', self.plugin.name, 'failed')
+        self.plugin.dl_time_spent = time.time() - start
 
 def is_worker_alive(workers):
     for w in workers:
@@ -276,6 +278,14 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         log('\n'+'*'*30, plugin.name, '*'*30)
         log('Request extra headers:', plugin.browser.addheaders)
         log('Found %d results'%len(presults))
+        time_spent = getattr(plugin, 'dl_time_spent', None)
+        if time_spent is None:
+            log('Downloading was aborted')
+        else:
+            log('Downloading from', plugin.name, 'took', time_spent)
+        for r in presults:
+            log('\n\n---')
+            log(unicode(r))
         if plog:
             log(plog)
         log('\n'+'*'*80)

From d9cea95a71913a256f2c1bc17ef58db9d477b8dd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 12:36:12 -0600
Subject: [PATCH 23/50] Start migrating the isbndb plugin

---
 src/calibre/ebooks/metadata/sources/base.py   |  2 +-
 .../ebooks/metadata/sources/identify.py       |  5 +++
 src/calibre/ebooks/metadata/sources/isbndb.py | 40 +++++++++++++++++++
 3 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/isbndb.py

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 86468141e1..30b804a76e 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -17,7 +17,7 @@ from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
 from calibre.ebooks.metadata import check_isbn
 
-msprefs = JSONConfig('metadata_sources.json')
+msprefs = JSONConfig('metadata_sources/global.json')
 msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 17adc6ffc6..b04a697ed8 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -272,6 +272,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         if k not in ('title', 'authors', 'identifiers'):
             sort_kwargs.pop(k)
 
+    longest, lp = -1, ''
     for plugin, presults in results.iteritems():
         presults.sort(key=plugin.identify_results_keygen(**sort_kwargs))
         plog = logs[plugin].getvalue().strip()
@@ -281,8 +282,11 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         time_spent = getattr(plugin, 'dl_time_spent', None)
         if time_spent is None:
             log('Downloading was aborted')
+            longest, lp = -1, plugin.name
         else:
             log('Downloading from', plugin.name, 'took', time_spent)
+            if time_spent > longest:
+                longest, lp = time_spent, plugin.name
         for r in presults:
             log('\n\n---')
             log(unicode(r))
@@ -297,6 +301,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
             result.identify_plugin = plugin
 
     log('The identify phase took %.2f seconds'%(time.time() - start_time))
+    log('The longest time (%f) was taken by:'%longest, lp)
     log('Merging results from different sources and finding earliest',
             'publication dates')
     start_time = time.time()
diff --git a/src/calibre/ebooks/metadata/sources/isbndb.py b/src/calibre/ebooks/metadata/sources/isbndb.py
new file mode 100644
index 0000000000..3cd9d96c81
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/isbndb.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from calibre.ebooks.metadata.sources.base import Source
+
+class ISBNDB(Source):
+
+    name = 'ISBNDB'
+    description = _('Downloads metadata from isbndb.com')
+
+    capabilities = frozenset(['identify'])
+    touched_fields = frozenset(['title', 'authors',
+        'identifier:isbn', 'comments', 'publisher'])
+    supports_gzip_transfer_encoding = True
+
+    def __init__(self, *args, **kwargs):
+        Source.__init__(self, *args, **kwargs)
+
+        prefs = self.prefs
+        prefs.defaults['key_migrated'] = False
+        prefs.defaults['isbndb_key'] = None
+
+        if not prefs['key_migrated']:
+            prefs['key_migrated'] = True
+            try:
+                from calibre.customize.ui import config
+                key = config['plugin_customization']['IsbnDB']
+                prefs['isbndb_key'] = key
+            except:
+                pass
+
+        self.isbndb_key = prefs['isbndb_key']
+
+

From 75f61b44a57c3fe80ab2339f5b5856ced9e74582 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 15:12:18 -0600
Subject: [PATCH 24/50] MOBI Output: Don't use self closed tags

---
 src/calibre/ebooks/mobi/writer.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py
index fccaad8811..5f4c47cdf3 100644
--- a/src/calibre/ebooks/mobi/writer.py
+++ b/src/calibre/ebooks/mobi/writer.py
@@ -282,8 +282,8 @@ class Serializer(object):
                 buffer.write('="')
                 self.serialize_text(val, quot=True)
                 buffer.write('"')
+        buffer.write('>')
         if elem.text or len(elem) > 0:
-            buffer.write('>')
             if elem.text:
                 self.anchor_offset = None
                 self.serialize_text(elem.text)
@@ -292,9 +292,7 @@ class Serializer(object):
                 if child.tail:
                     self.anchor_offset = None
                     self.serialize_text(child.tail)
-            buffer.write('</%s>' % tag)
-        else:
-            buffer.write('/>')
+        buffer.write('</%s>' % tag)
 
     def serialize_text(self, text, quot=False):
         text = text.replace('&', '&amp;')

From 23251c969db2cef7691fc87b4e6e3db6a5f9d8fd Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 16:08:05 -0600
Subject: [PATCH 25/50] Fixes to the new metadata dialog

---
 src/calibre/gui2/metadata/basic_widgets.py | 30 ++++++++++++++++++----
 src/calibre/gui2/metadata/single.py        | 21 +++++++++++----
 src/calibre/gui2/widgets.py                |  1 +
 3 files changed, 42 insertions(+), 10 deletions(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 635a037482..995fa082a7 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
 
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@@ -7,10 +9,10 @@ __docformat__ = 'restructuredtext en'
 
 import textwrap, re, os
 
-from PyQt4.Qt import Qt, QDateEdit, QDate, \
-    QIcon, QToolButton, QWidget, QLabel, QGridLayout, \
-    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap, \
-    QPushButton, QSpinBox, QLineEdit
+from PyQt4.Qt import (Qt, QDateEdit, QDate,
+    QIcon, QToolButton, QWidget, QLabel, QGridLayout,
+    QDoubleSpinBox, QListWidgetItem, QSize, QPixmap,
+    QPushButton, QSpinBox, QLineEdit, QSizePolicy)
 
 from calibre.gui2.widgets import EnLineEdit, FormatList, ImageView
 from calibre.gui2.complete import MultiCompleteLineEdit, MultiCompleteComboBox
@@ -22,7 +24,7 @@ from calibre.ebooks.metadata.meta import get_metadata
 from calibre.gui2 import file_icon_provider, UNDEFINED_QDATE, UNDEFINED_DATE, \
         choose_files, error_dialog, choose_images, question_dialog
 from calibre.utils.date import local_tz, qt_to_dt
-from calibre import strftime
+from calibre import strftime, fit_image
 from calibre.ebooks import BOOK_EXTENSIONS
 from calibre.customize.ui import run_plugins_on_import
 from calibre.utils.date import utcfromtimestamp
@@ -480,6 +482,7 @@ class FormatsManager(QWidget): # {{{
 
     def initialize(self, db, id_):
         self.changed = False
+        self.formats.clear()
         exts = db.formats(id_, index_is_id=True)
         self.original_val = set([])
         if exts:
@@ -638,6 +641,23 @@ class Cover(ImageView): # {{{
                 self.trim_cover_button, self.download_cover_button,
                 self.generate_cover_button]
 
+        self.frame_size = (300, 400)
+        self.setSizePolicy(QSizePolicy(QSizePolicy.Preferred,
+            QSizePolicy.Preferred))
+
+    def frame_resized(self, ev):
+        sz = ev.size()
+        self.frame_size = (sz.width()//3, sz.height())
+
+    def sizeHint(self):
+        sz = ImageView.sizeHint(self)
+        w, h = sz.width(), sz.height()
+        resized, nw, nh = fit_image(w, h, self.frame_size[0],
+                self.frame_size[1])
+        if resized:
+            sz = QSize(nw, nh)
+        return sz
+
     def select_cover(self, *args):
         files = choose_images(self, 'change cover dialog',
                              _('Choose cover for ') +
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 5b17b454e7..e20c519aa8 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -1,5 +1,7 @@
 #!/usr/bin/env python
 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
 
 __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
@@ -8,10 +10,10 @@ __docformat__ = 'restructuredtext en'
 import os
 from functools import partial
 
-from PyQt4.Qt import Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton, \
-        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont, \
-        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem, \
-        QSizePolicy, QPalette, QFrame, QSize, QKeySequence
+from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
+        QGridLayout, pyqtSignal, QDialogButtonBox, QScrollArea, QFont,
+        QTabWidget, QIcon, QToolButton, QSplitter, QGroupBox, QSpacerItem,
+        QSizePolicy, QPalette, QFrame, QSize, QKeySequence)
 
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
@@ -385,6 +387,14 @@ class MetadataSingleDialogBase(ResizableDialog):
                 disconnect(x.clicked)
     # }}}
 
+class Splitter(QSplitter):
+
+    frame_resized = pyqtSignal(object)
+
+    def resizeEvent(self, ev):
+        self.frame_resized.emit(ev)
+        return QSplitter.resizeEvent(self, ev)
+
 class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
 
     def do_layout(self):
@@ -437,8 +447,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
 
         tl.addWidget(self.formats_manager, 0, 6, 3, 1)
 
-        self.splitter = QSplitter(Qt.Horizontal, self)
+        self.splitter = Splitter(Qt.Horizontal, self)
         self.splitter.addWidget(self.cover)
+        self.splitter.frame_resized.connect(self.cover.frame_resized)
         l.addWidget(self.splitter)
         self.tabs[0].gb = gb = QGroupBox(_('Change cover'), self)
         gb.l = l = QGridLayout()
diff --git a/src/calibre/gui2/widgets.py b/src/calibre/gui2/widgets.py
index e5f1c94342..ea0d2570e5 100644
--- a/src/calibre/gui2/widgets.py
+++ b/src/calibre/gui2/widgets.py
@@ -312,6 +312,7 @@ class ImageView(QWidget, ImageDropMixin):
         p.setPen(pen)
         if self.draw_border:
             p.drawRect(target)
+        #p.drawRect(self.rect())
         p.end()
 
 class CoverView(QGraphicsView, ImageDropMixin):

From 261eaad8d275667ff452a6e8d7287bbfb04794b7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 16:34:05 -0600
Subject: [PATCH 26/50] Allow editing of all identifiers in the new metadata
 edit dialog

---
 src/calibre/gui2/metadata/basic_widgets.py | 37 +++++++++++++++-------
 src/calibre/gui2/metadata/single.py        | 28 ++++++++--------
 2 files changed, 40 insertions(+), 25 deletions(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 995fa082a7..bab9073588 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -902,8 +902,11 @@ class TagsEdit(MultiCompleteLineEdit): # {{{
 
 # }}}
 
-class ISBNEdit(QLineEdit): # {{{
-    LABEL = _('IS&BN:')
+class IdentifiersEdit(QLineEdit): # {{{
+    LABEL = _('I&ds:')
+    BASE_TT = _('Edit the identifiers for this book. '
+            'For example: \n\n%s')%(
+            'isbn:1565927249, doi:10.1000/182, amazon:1565927249')
 
     def __init__(self, parent):
         QLineEdit.__init__(self, parent)
@@ -913,32 +916,44 @@ class ISBNEdit(QLineEdit): # {{{
     @dynamic_property
     def current_val(self):
         def fget(self):
-            return self.pat.sub('', unicode(self.text()).strip())
+            raw = unicode(self.text()).strip()
+            parts = [x.strip() for x in raw.split(',')]
+            ans = {}
+            for x in parts:
+                c = x.split(':')
+                if len(c) == 2:
+                    ans[c[0]] = c[1]
+            return ans
         def fset(self, val):
             if not val:
-                val = ''
-            self.setText(val.strip())
+                val = {}
+            txt = ', '.join(['%s:%s'%(k, v) for k, v in val.iteritems()])
+            self.setText(txt.strip())
         return property(fget=fget, fset=fset)
 
     def initialize(self, db, id_):
-        self.current_val = db.isbn(id_, index_is_id=True)
+        self.current_val = db.get_identifiers(id_, index_is_id=True)
         self.original_val = self.current_val
 
     def commit(self, db, id_):
-        db.set_isbn(id_, self.current_val, notify=False, commit=False)
+        if self.original_val != self.current_val:
+            db.set_identifiers(id_, self.current_val, notify=False, commit=False)
         return True
 
     def validate(self, *args):
-        isbn = self.current_val
-        tt = _('This ISBN number is valid')
+        identifiers = self.current_val
+        isbn = identifiers.get('isbn', '')
+        tt = self.BASE_TT
+        extra = ''
         if not isbn:
             col = 'rgba(0,255,0,0%)'
         elif check_isbn(isbn) is not None:
             col = 'rgba(0,255,0,20%)'
+            extra = '\n\n'+_('This ISBN number is valid')
         else:
             col = 'rgba(255,0,0,20%)'
-            tt = _('This ISBN number is invalid')
-        self.setToolTip(tt)
+            extra = '\n\n' + _('This ISBN number is invalid')
+        self.setToolTip(tt+extra)
         self.setStyleSheet('QLineEdit { background-color: %s }'%col)
 
 # }}}
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index e20c519aa8..70307eb3b1 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -17,10 +17,10 @@ from PyQt4.Qt import (Qt, QVBoxLayout, QHBoxLayout, QWidget, QPushButton,
 
 from calibre.ebooks.metadata import authors_to_string, string_to_authors
 from calibre.gui2 import ResizableDialog, error_dialog, gprefs
-from calibre.gui2.metadata.basic_widgets import TitleEdit, AuthorsEdit, \
-    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, ISBNEdit, \
-    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit, \
-    BuddyLabel, DateEdit, PubdateEdit
+from calibre.gui2.metadata.basic_widgets import (TitleEdit, AuthorsEdit,
+    AuthorSortEdit, TitleSortEdit, SeriesEdit, SeriesIndexEdit, IdentifiersEdit,
+    RatingEdit, PublisherEdit, TagsEdit, FormatsManager, Cover, CommentsEdit,
+    BuddyLabel, DateEdit, PubdateEdit)
 from calibre.gui2.custom_column_widgets import populate_metadata_page
 from calibre.utils.config import tweaks
 
@@ -147,8 +147,8 @@ class MetadataSingleDialogBase(ResizableDialog):
         self.tags_editor_button.clicked.connect(self.tags_editor)
         self.basic_metadata_widgets.append(self.tags)
 
-        self.isbn = ISBNEdit(self)
-        self.basic_metadata_widgets.append(self.isbn)
+        self.identifiers = IdentifiersEdit(self)
+        self.basic_metadata_widgets.append(self.identifiers)
 
         self.publisher = PublisherEdit(self)
         self.basic_metadata_widgets.append(self.publisher)
@@ -282,8 +282,8 @@ class MetadataSingleDialogBase(ResizableDialog):
             self.publisher.current_val = mi.publisher
         if not mi.is_null('tags'):
             self.tags.current_val = mi.tags
-        if not mi.is_null('isbn'):
-            self.isbn.current_val = mi.isbn
+        if not mi.is_null('identifiers'):
+            self.identifiers.current_val = mi.identifiers
         if not mi.is_null('pubdate'):
             self.pubdate.current_val = mi.pubdate
         if not mi.is_null('series') and mi.series.strip():
@@ -486,9 +486,9 @@ class MetadataSingleDialog(MetadataSingleDialogBase): # {{{
         create_row2(1, self.rating)
         sto(self.rating, self.tags)
         create_row2(2, self.tags, self.tags_editor_button)
-        sto(self.tags_editor_button, self.isbn)
-        create_row2(3, self.isbn)
-        sto(self.isbn, self.timestamp)
+        sto(self.tags_editor_button, self.identifiers)
+        create_row2(3, self.identifiers)
+        sto(self.identifiers, self.timestamp)
         create_row2(4, self.timestamp, self.timestamp.clear_button)
         sto(self.timestamp.clear_button, self.pubdate)
         create_row2(5, self.pubdate, self.pubdate.clear_button)
@@ -573,9 +573,9 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
         create_row(8, self.pubdate, self.publisher,
                    button=self.pubdate.clear_button, icon='trash.png')
         create_row(9, self.publisher, self.timestamp)
-        create_row(10, self.timestamp, self.isbn,
+        create_row(10, self.timestamp, self.identifiers,
                    button=self.timestamp.clear_button, icon='trash.png')
-        create_row(11, self.isbn, self.comments)
+        create_row(11, self.identifiers, self.comments)
         tl.addItem(QSpacerItem(1, 1, QSizePolicy.Fixed, QSizePolicy.Expanding),
                    12, 1, 1 ,1)
 
@@ -591,7 +591,7 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
             sr.setWidget(w)
             gbl.addWidget(sr)
             self.tabs[0].l.addWidget(gb, 0, 1, 1, 1)
-            sto(self.isbn, gb)
+            sto(self.identifiers, gb)
 
         w = QGroupBox(_('&Comments'), tab0)
         sp = QSizePolicy()

From cc0f8f4323a788f7736c02ae2685c3e7ddb760d9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 17:21:16 -0600
Subject: [PATCH 27/50] Switch fetch-ebook-metadata to use the new metadata
 download framework

---
 src/calibre/ebooks/metadata/sources/cli.py    | 79 +++++++++++++++++++
 src/calibre/ebooks/metadata/sources/google.py |  3 +
 src/calibre/linux.py                          |  4 +-
 3 files changed, 84 insertions(+), 2 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/cli.py

diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
new file mode 100644
index 0000000000..d2cc1648f9
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import sys, textwrap
+from io import BytesIO
+from threading import Event
+
+from calibre import prints
+from calibre.utils.config import OptionParser
+from calibre.ebooks.metadata import string_to_authors
+from calibre.ebooks.metadata.opf2 import metadata_to_opf
+from calibre.ebooks.metadata.sources.base import create_log
+from calibre.ebooks.metadata.sources.identify import identify
+
+def option_parser():
+    parser = OptionParser(textwrap.dedent(
+        '''\
+        %prog [options]
+
+        Fetch book metadata from online sources. You must specify at least one
+        of title, authors or ISBN.
+        '''
+    ))
+    parser.add_option('-t', '--title', help='Book title')
+    parser.add_option('-a', '--authors', help='Book author(s)')
+    parser.add_option('-i', '--isbn', help='Book ISBN')
+    parser.add_option('-v', '--verbose', default=False, action='store_true',
+                      help='Print the log to the console (stderr)')
+    parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
+    parser.add_option('-d', '--timeout', default='30',
+            help='Timeout in seconds. Default is 30')
+
+    return parser
+
+def main(args=sys.argv):
+    parser = option_parser()
+    opts, args = parser.parse_args(args)
+
+    buf = BytesIO()
+    log = create_log(buf)
+    abort = Event()
+
+    authors = []
+    if opts.authors:
+        authors = string_to_authors(opts.authors)
+
+    identifiers = {}
+    if opts.isbn:
+        identifiers['isbn'] = opts.isbn
+
+    results = identify(log, abort, title=opts.title, authors=authors,
+            identifiers=identifiers, timeout=int(opts.timeout))
+
+    log = buf.getvalue()
+
+    if not results:
+        print (log, file=sys.stderr)
+        prints('No results found', file=sys.stderr)
+        raise SystemExit(1)
+
+    result = results[0]
+    result = (metadata_to_opf(result) if opts.opf else
+                    unicode(result).encode('utf-8'))
+
+    if opts.verbose:
+        print (log, file=sys.stderr)
+
+    print (result)
+
+    return 0
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 21c99fdf46..c4e2f9fe24 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -271,6 +271,9 @@ class GoogleBooks(Source):
             identifiers={}, timeout=30):
         query = self.create_query(log, title=title, authors=authors,
                 identifiers=identifiers)
+        if not query:
+            log.error('Insufficient metadata to construct query')
+            return
         br = self.browser
         try:
             raw = br.open_novisit(query, timeout=timeout).read()
diff --git a/src/calibre/linux.py b/src/calibre/linux.py
index 64c363b8ba..5c80df20df 100644
--- a/src/calibre/linux.py
+++ b/src/calibre/linux.py
@@ -30,7 +30,7 @@ entry_points = {
              'calibre-customize  = calibre.customize.ui:main',
              'calibre-complete   = calibre.utils.complete:main',
              'pdfmanipulate      = calibre.ebooks.pdf.manipulate.cli:main',
-             'fetch-ebook-metadata = calibre.ebooks.metadata.fetch:main',
+             'fetch-ebook-metadata = calibre.ebooks.metadata.sources.cli:main',
              'epub-fix           = calibre.ebooks.epub.fix.main:main',
              'calibre-smtp = calibre.utils.smtp:main',
         ],
@@ -183,7 +183,7 @@ class PostInstall:
             from calibre.ebooks.lrf.lrfparser import option_parser as lrf2lrsop
             from calibre.gui2.lrf_renderer.main import option_parser as lrfviewerop
             from calibre.gui2.viewer.main import option_parser as viewer_op
-            from calibre.ebooks.metadata.fetch import option_parser as fem_op
+            from calibre.ebooks.metadata.sources.cli import option_parser as fem_op
             from calibre.gui2.main import option_parser as guiop
             from calibre.utils.smtp import option_parser as smtp_op
             from calibre.library.server.main import option_parser as serv_op

From 458727a5600af8683101e9362eca9c9a003462f8 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 21:35:26 -0600
Subject: [PATCH 28/50] ...

---
 src/calibre/ebooks/metadata/book/base.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 328ab7be26..ff22cd3608 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -125,7 +125,10 @@ class Metadata(object):
         _data = object.__getattribute__(self, '_data')
         if field in TOP_LEVEL_IDENTIFIERS:
             field, val = self._clean_identifier(field, val)
-            _data['identifiers'].update({field: val})
+            identifiers = _data['identifiers']
+            identifiers.pop(field, None)
+            if val:
+                identifiers[field] = val
         elif field == 'identifiers':
             if not val:
                 val = copy.copy(NULL_VALUES.get('identifiers', None))
@@ -224,8 +227,7 @@ class Metadata(object):
         identifiers = object.__getattribute__(self,
             '_data')['identifiers']
 
-        if not val and typ in identifiers:
-            identifiers.pop(typ)
+        identifiers.pop(typ, None)
         if val:
             identifiers[typ] = val
 
@@ -647,7 +649,7 @@ class Metadata(object):
             fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
         if self.series:
             fmt('Series', self.series + ' #%s'%self.format_series_index())
-        if self.language:
+        if not self.is_null('language'):
             fmt('Language', self.language)
         if self.rating is not None:
             fmt('Rating', self.rating)

From afebdabbf140c14a9ee61dd935b659db0dc5e59e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:11:43 -0600
Subject: [PATCH 29/50] save_cover_data_to: Fix return_data returning Image
 object instead of bytes when an Image object is passed in

---
 src/calibre/utils/magick/draw.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/utils/magick/draw.py b/src/calibre/utils/magick/draw.py
index 42659d70cc..fdce30177a 100644
--- a/src/calibre/utils/magick/draw.py
+++ b/src/calibre/utils/magick/draw.py
@@ -92,12 +92,12 @@ def save_cover_data_to(data, path, bgcolor='#ffffff', resize_to=None,
     ret = None
     if return_data:
         ret = data
-        if changed:
+        if changed or isinstance(ret, Image):
             if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                 img.set_compression_quality(compression_quality)
             ret = img.export(fmt)
     else:
-        if changed:
+        if changed or isinstance(ret, Image):
             if hasattr(img, 'set_compression_quality') and fmt == 'jpg':
                 img.set_compression_quality(compression_quality)
             img.save(path)

From 2828ba527699ef3911281f378ea608248c79a52e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:12:18 -0600
Subject: [PATCH 30/50] ...

---
 src/calibre/ebooks/metadata/sources/identify.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index b04a697ed8..77391bac6b 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -354,10 +354,10 @@ if __name__ == '__main__': # tests {{{
                     exact=True), authors_test(['Dan Brown'])]
             ),
 
-            ( # No specific problems
-                {'identifiers':{'isbn': '0743273567'}},
-                [title_test('The great gatsby', exact=True),
-                    authors_test(['Francis Scott Fitzgerald'])]
+            ( # No ISBN
+                {'title':'Justine', 'authors':['Durrel']},
+                [title_test('Justine', exact=True),
+                    authors_test(['Lawrence Durrel'])]
             ),
 
             (  # A newer book

From 6773cf71af98e80ea04d951f043b08f9eae508ab Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Tue, 5 Apr 2011 23:16:59 -0600
Subject: [PATCH 31/50] Add cover downloading to the new fetch-ebook-metadata

---
 src/calibre/ebooks/metadata/sources/amazon.py |   5 +-
 src/calibre/ebooks/metadata/sources/base.py   |   9 +-
 src/calibre/ebooks/metadata/sources/cli.py    |  25 ++-
 src/calibre/ebooks/metadata/sources/covers.py | 178 ++++++++++++++++++
 src/calibre/ebooks/metadata/sources/google.py |  20 +-
 .../ebooks/metadata/sources/openlibrary.py    |   2 +-
 6 files changed, 224 insertions(+), 15 deletions(-)
 create mode 100644 src/calibre/ebooks/metadata/sources/covers.py

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index d1c8f24da6..d48f502c29 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -279,7 +279,7 @@ class Worker(Thread): # Get details {{{
 
 class Amazon(Source):
 
-    name = 'Amazon'
+    name = 'Amazon Metadata'
     description = _('Downloads metadata from Amazon')
 
     capabilities = frozenset(['identify', 'cover'])
@@ -493,9 +493,10 @@ class Amazon(Source):
         if abort.is_set():
             return
         br = self.browser
+        log('Downloading cover from:', cached_url)
         try:
             cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
         except:
             log.exception('Failed to download cover from:', cached_url)
     # }}}
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 30b804a76e..33232f25ab 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -22,6 +22,12 @@ msprefs.defaults['txt_comments'] = False
 msprefs.defaults['ignore_fields'] = []
 msprefs.defaults['max_tags'] = 20
 msprefs.defaults['wait_after_first_identify_result'] = 30 # seconds
+msprefs.defaults['wait_after_first_cover_result'] = 60 # seconds
+
+# Google covers are often poor quality (scans/errors) but they have high
+# resolution, so they trump covers from better sources. So make sure they
+# are only used if no other covers are found.
+msprefs.defaults['cover_priorities'] = {'Google':2}
 
 def create_log(ostream=None):
     log = ThreadSafeLog(level=ThreadSafeLog.DEBUG)
@@ -340,7 +346,8 @@ class Source(Plugin):
             title=None, authors=None, identifiers={}, timeout=30):
         '''
         Download a cover and put it into result_queue. The parameters all have
-        the same meaning as for :meth:`identify`.
+        the same meaning as for :meth:`identify`. Put (self, cover_data) into
+        result_queue.
 
         This method should use cached cover URLs for efficiency whenever
         possible. When cached data is not present, most plugins simply call
diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index d2cc1648f9..b39da07d53 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -13,10 +13,13 @@ from threading import Event
 
 from calibre import prints
 from calibre.utils.config import OptionParser
+from calibre.utils.magick.draw import save_cover_data_to
 from calibre.ebooks.metadata import string_to_authors
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
+from calibre.ebooks.metadata.sources.covers import download_cover
+
 
 def option_parser():
     parser = OptionParser(textwrap.dedent(
@@ -33,6 +36,8 @@ def option_parser():
     parser.add_option('-v', '--verbose', default=False, action='store_true',
                       help='Print the log to the console (stderr)')
     parser.add_option('-o', '--opf', help='Output the metadata in OPF format')
+    parser.add_option('-c', '--cover',
+            help='Specify a filename. The cover, if available, will be saved to it')
     parser.add_option('-d', '--timeout', default='30',
             help='Timeout in seconds. Default is 30')
 
@@ -57,14 +62,26 @@ def main(args=sys.argv):
     results = identify(log, abort, title=opts.title, authors=authors,
             identifiers=identifiers, timeout=int(opts.timeout))
 
-    log = buf.getvalue()
-
     if not results:
         print (log, file=sys.stderr)
         prints('No results found', file=sys.stderr)
         raise SystemExit(1)
-
     result = results[0]
+
+    cf = None
+    if opts.cover and results:
+        cover = download_cover(log, title=opts.title, authors=authors,
+                identifiers=result.identifiers, timeout=int(opts.timeout))
+        if cover is None:
+            prints('No cover found', file=sys.stderr)
+        else:
+            save_cover_data_to(cover[-1], opts.cover)
+            result.cover = cf = opts.cover
+
+
+    log = buf.getvalue()
+
+
     result = (metadata_to_opf(result) if opts.opf else
                     unicode(result).encode('utf-8'))
 
@@ -72,6 +89,8 @@ def main(args=sys.argv):
         print (log, file=sys.stderr)
 
     print (result)
+    if not opts.opf:
+        prints('Cover               :', cf)
 
     return 0
 
diff --git a/src/calibre/ebooks/metadata/sources/covers.py b/src/calibre/ebooks/metadata/sources/covers.py
new file mode 100644
index 0000000000..46b278397c
--- /dev/null
+++ b/src/calibre/ebooks/metadata/sources/covers.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+import time
+from Queue import Queue, Empty
+from threading import Thread, Event
+from io import BytesIO
+
+from calibre.customize.ui import metadata_plugins
+from calibre.ebooks.metadata.sources.base import msprefs, create_log
+from calibre.utils.magick.draw import Image, save_cover_data_to
+
+class Worker(Thread):
+
+    def __init__(self, plugin, abort, title, authors, identifiers, timeout, rq):
+        Thread.__init__(self)
+        self.daemon = True
+
+        self.plugin = plugin
+        self.abort = abort
+        self.buf = BytesIO()
+        self.log = create_log(self.buf)
+        self.title, self.authors, self.identifiers = (title, authors,
+                identifiers)
+        self.timeout, self.rq = timeout, rq
+        self.time_spent = None
+
+    def run(self):
+        start_time = time.time()
+        if not self.abort.is_set():
+            try:
+                self.plugin.download_cover(self.log, self.rq, self.abort,
+                    title=self.title, authors=self.authors,
+                    identifiers=self.identifiers, timeout=self.timeout)
+            except:
+                self.log.exception('Failed to download cover from',
+                        self.plugin.name)
+        self.time_spent = time.time() - start_time
+
+def is_worker_alive(workers):
+    for w in workers:
+        if w.is_alive():
+            return True
+    return False
+
+def process_result(log, result):
+    plugin, data = result
+    try:
+        im = Image()
+        im.load(data)
+        im.trim(10)
+        width, height = im.size
+        fmt = im.format
+
+        if width < 50 or height < 50:
+            raise ValueError('Image too small')
+        data = save_cover_data_to(im, '/cover.jpg', return_data=True)
+    except:
+        log.exception('Invalid cover from', plugin.name)
+        return None
+    return (plugin, width, height, fmt, data)
+
+def run_download(log, results, abort,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Run the cover download, putting results into the queue :param:`results`.
+
+    Each result is a tuple of the form:
+
+        (plugin, width, height, fmt, bytes)
+
+    '''
+    plugins = list(metadata_plugins(['cover']))
+
+    rq = Queue()
+    workers = [Worker(p, abort, title, authors, identifiers, timeout, rq) for p
+            in plugins]
+    for w in workers:
+        w.start()
+
+    first_result_at = None
+    wait_time = msprefs['wait_after_first_cover_result']
+    found_results = {}
+
+    while True:
+        time.sleep(0.1)
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+                if first_result_at is not None:
+                    first_result_at = time.time()
+        except Empty:
+            pass
+
+        if not is_worker_alive(workers):
+            break
+
+        if first_result_at is not None and time.time() - first_result_at > wait_time:
+            log('Not waiting for any more results')
+            abort.set()
+
+        if abort.is_set():
+            break
+
+    while True:
+        try:
+            x = rq.get_nowait()
+            result = process_result(log, x)
+            if result is not None:
+                results.put(result)
+                found_results[result[0]] = result
+        except Empty:
+            break
+
+    for w in workers:
+        wlog = w.buf.getvalue().strip()
+        log('\n'+'*'*30, w.plugin.name, 'Covers', '*'*30)
+        log('Request extra headers:', w.plugin.browser.addheaders)
+        if w.plugin in found_results:
+            result = found_results[w.plugin]
+            log('Downloaded cover:', '%dx%d'%(result[1], result[2]))
+        else:
+            log('Failed to download valid cover')
+        if w.time_spent is None:
+            log('Download aborted')
+        else:
+            log('Took', w.time_spent, 'seconds')
+        if wlog:
+            log(wlog)
+        log('\n'+'*'*80)
+
+
+def download_cover(log,
+        title=None, authors=None, identifiers={}, timeout=30):
+    '''
+    Synchronous cover download. Returns the "best" cover as per user
+    prefs/cover resolution.
+
+    Return cover is a tuple: (plugin, width, height, fmt, data)
+
+    Returns None if no cover is found.
+    '''
+    rq = Queue()
+    abort = Event()
+
+    run_download(log, rq, abort, title=title, authors=authors,
+            identifiers=identifiers, timeout=timeout)
+
+    results = []
+
+    while True:
+        try:
+            results.append(rq.get_nowait())
+        except Empty:
+            break
+
+    cp = msprefs['cover_priorities']
+
+    def keygen(result):
+        plugin, width, height, fmt, data = result
+        return (cp.get(plugin.name, 1), 1/(width*height))
+
+    results.sort(key=keygen)
+
+    return results[0] if results else None
+
+
+
+
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index c4e2f9fe24..47cfb823bb 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -145,15 +145,18 @@ def to_metadata(browser, log, entry_, timeout): # {{{
             log.exception('Failed to parse rating')
 
     # Cover
-    mi.has_google_cover = len(extra.xpath(
-        '//*[@rel="http://schemas.google.com/books/2008/thumbnail"]')) > 0
+    mi.has_google_cover = None
+    for x in extra.xpath(
+            '//*[@href and @rel="http://schemas.google.com/books/2008/thumbnail"]'):
+        mi.has_google_cover = x.get('href')
+        break
 
     return mi
 # }}}
 
 class GoogleBooks(Source):
 
-    name = 'Google Books'
+    name = 'Google'
     description = _('Downloads metadata from Google Books')
 
     capabilities = frozenset(['identify', 'cover'])
@@ -213,7 +216,7 @@ class GoogleBooks(Source):
             results.sort(key=self.identify_results_keygen(
                 title=title, authors=authors, identifiers=identifiers))
             for mi in results:
-                cached_url = self.cover_url_from_identifiers(mi.identifiers)
+                cached_url = self.get_cached_cover_url(mi.identifiers)
                 if cached_url is not None:
                     break
         if cached_url is None:
@@ -223,9 +226,10 @@ class GoogleBooks(Source):
         if abort.is_set():
             return
         br = self.browser
+        log('Downloading cover from:', cached_url)
         try:
             cdata = br.open_novisit(cached_url, timeout=timeout).read()
-            result_queue.put(cdata)
+            result_queue.put((self, cdata))
         except:
             log.exception('Failed to download cover from:', cached_url)
 
@@ -254,9 +258,9 @@ class GoogleBooks(Source):
                     goog = ans.identifiers['google']
                     for isbn in getattr(ans, 'all_isbns', []):
                         self.cache_isbn_to_identifier(isbn, goog)
-                        if ans.has_google_cover:
-                            self.cache_identifier_to_cover_url(goog,
-                                    self.GOOGLE_COVER%goog)
+                    if ans.has_google_cover:
+                        self.cache_identifier_to_cover_url(goog,
+                                self.GOOGLE_COVER%goog)
                     self.clean_downloaded_metadata(ans)
                     result_queue.put(ans)
             except:
diff --git a/src/calibre/ebooks/metadata/sources/openlibrary.py b/src/calibre/ebooks/metadata/sources/openlibrary.py
index 1fcb33e35f..19b8747265 100644
--- a/src/calibre/ebooks/metadata/sources/openlibrary.py
+++ b/src/calibre/ebooks/metadata/sources/openlibrary.py
@@ -26,7 +26,7 @@ class OpenLibrary(Source):
         br = self.browser
         try:
             ans = br.open_novisit(self.OPENLIBRARY%isbn, timeout=timeout).read()
-            result_queue.put(ans)
+            result_queue.put((self, ans))
         except Exception as e:
             if callable(getattr(e, 'getcode', None)) and e.getcode() == 404:
                 log.error('No cover for ISBN: %r found'%isbn)

From 62b1ae917608c47571180ff644ba81bdd4438509 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:04:27 -0600
Subject: [PATCH 32/50] identify(): Merge results with identical title and
 authors that aren't matched by xISBN

---
 src/calibre/ebooks/metadata/sources/cli.py    |  2 +-
 .../ebooks/metadata/sources/identify.py       | 53 ++++++++++++++++---
 2 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index b39da07d53..58042da2bf 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -89,7 +89,7 @@ def main(args=sys.argv):
         print (log, file=sys.stderr)
 
     print (result)
-    if not opts.opf:
+    if not opts.opf and opts.cover:
         prints('Cover               :', cf)
 
     return 0
diff --git a/src/calibre/ebooks/metadata/sources/identify.py b/src/calibre/ebooks/metadata/sources/identify.py
index 77391bac6b..cbc12b6167 100644
--- a/src/calibre/ebooks/metadata/sources/identify.py
+++ b/src/calibre/ebooks/metadata/sources/identify.py
@@ -20,6 +20,7 @@ from calibre.ebooks.metadata.xisbn import xisbn
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.utils.date import utc_tz
 from calibre.utils.html2text import html2text
+from calibre.utils.icu import lower
 
 # Download worker {{{
 class Worker(Thread):
@@ -97,11 +98,45 @@ class ISBNMerge(object):
         if has_isbn_result:
             self.merge_isbn_results()
         else:
-            self.results = sorted(self.isbnless_results,
+            results = sorted(self.isbnless_results,
                     key=attrgetter('relevance_in_source'))
+            # Pick only the most relevant result from each source
+            self.results = []
+            seen = set()
+            for result in results:
+                if result.identify_plugin not in seen:
+                    seen.add(result.identify_plugin)
+                    self.results.append(result)
+                    result.average_source_relevance = \
+                        result.relevance_in_source
+
+        self.merge_metadata_results()
 
         return self.results
 
+    def merge_metadata_results(self):
+        ' Merge results with identical title and authors '
+        groups = {}
+        for result in self.results:
+            title = lower(result.title if result.title else '')
+            key = (title, tuple([lower(x) for x in result.authors]))
+            if key not in groups:
+                groups[key] = []
+            groups[key].append(result)
+
+        if len(groups) != len(self.results):
+            self.results = []
+            for rgroup in groups.itervalues():
+                rel = [r.average_source_relevance for r in rgroup]
+                if len(rgroup) > 1:
+                    result = self.merge(rgroup, None, do_asr=False)
+                    result.average_source_relevance = sum(rel)/len(rel)
+                else:
+                    result = rgroup[0]
+                self.results.append(result)
+
+        self.results.sort(key=attrgetter('average_source_relevance'))
+
     def merge_isbn_results(self):
         self.results = []
         for min_year, results in self.pools.itervalues():
@@ -122,7 +157,7 @@ class ISBNMerge(object):
         values = [getattr(x, attr) for x in results if not x.is_null(attr)]
         return values[0] if values else null_value
 
-    def merge(self, results, min_year):
+    def merge(self, results, min_year, do_asr=True):
         ans = Metadata(_('Unknown'))
 
         # We assume the shortest title has the least cruft in it
@@ -185,7 +220,8 @@ class ISBNMerge(object):
         # Merge any other fields with no special handling (random merge)
         touched_fields = set()
         for r in results:
-            touched_fields |= r.identify_plugin.touched_fields
+            if hasattr(r, 'identify_plugin'):
+                touched_fields |= r.identify_plugin.touched_fields
 
         for f in touched_fields:
             if f.startswith('identifier:') or not ans.is_null(f):
@@ -193,9 +229,10 @@ class ISBNMerge(object):
             setattr(ans, f, self.random_merge(f, results,
                 null_value=getattr(ans, f)))
 
-        avg = [x.relevance_in_source for x in results]
-        avg = sum(avg)/len(avg)
-        ans.average_source_relevance = avg
+        if do_asr:
+            avg = [x.relevance_in_source for x in results]
+            avg = sum(avg)/len(avg)
+            ans.average_source_relevance = avg
 
         return ans
 
@@ -210,7 +247,8 @@ def merge_identify_results(result_map, log):
 
 # }}}
 
-def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
+def identify(log, abort, # {{{
+        title=None, authors=None, identifiers={}, timeout=30):
     start_time = time.time()
     plugins = list(metadata_plugins(['identify']))
 
@@ -322,6 +360,7 @@ def identify(log, abort, title=None, authors=None, identifiers={}, timeout=30):
         r.tags = r.tags[:max_tags]
 
     return results
+# }}}
 
 if __name__ == '__main__': # tests {{{
     # To run these test use: calibre-debug -e

From 6059a77d86de5f4433bf4a23fb595a4bf9df9113 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:14:44 -0600
Subject: [PATCH 33/50] Fix author name casing algorithm

---
 src/calibre/ebooks/metadata/sources/base.py | 26 +++++++++++++++++----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 33232f25ab..77cc8eaba8 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -15,6 +15,7 @@ from calibre.customize import Plugin
 from calibre.utils.logging import ThreadSafeLog, FileStream
 from calibre.utils.config import JSONConfig
 from calibre.utils.titlecase import titlecase
+from calibre.utils.icu import capitalize, lower
 from calibre.ebooks.metadata import check_isbn
 
 msprefs = JSONConfig('metadata_sources/global.json')
@@ -107,6 +108,25 @@ def get_cached_cover_urls(mi):
         if url:
             yield (p, url)
 
+def cap_author_token(token):
+    if lower(token) in ('von', 'de', 'el', 'van'):
+        return lower(token)
+    return capitalize(token)
+
+def fixauthors(authors):
+    if not authors:
+        return authors
+    ans = []
+    for x in authors:
+        ans.append(' '.join(map(cap_author_token, x.split())))
+    return ans
+
+def fixcase(x):
+    if x:
+        x = titlecase(x)
+    return x
+
+
 
 class Source(Plugin):
 
@@ -259,13 +279,9 @@ class Source(Plugin):
         before putting the Metadata object into result_queue. You can of
         course, use a custom algorithm suited to your metadata source.
         '''
-        def fixcase(x):
-            if x:
-                x = titlecase(x)
-            return x
         if mi.title:
             mi.title = fixcase(mi.title)
-        mi.authors = list(map(fixcase, mi.authors))
+        mi.authors = fixauthors(mi.authors)
         mi.tags = list(map(fixcase, mi.tags))
         mi.isbn = check_isbn(mi.isbn)
 

From ea2a5c7537457bc4daf8cddfd9eadbc899374dde Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:15:27 -0600
Subject: [PATCH 34/50] ...

---
 src/calibre/ebooks/metadata/sources/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 77cc8eaba8..ac95860f66 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -109,7 +109,7 @@ def get_cached_cover_urls(mi):
             yield (p, url)
 
 def cap_author_token(token):
-    if lower(token) in ('von', 'de', 'el', 'van'):
+    if lower(token) in ('von', 'de', 'el', 'van', 'le'):
         return lower(token)
     return capitalize(token)
 

From 41815e218ae32d7c8faa60244773e9078219e5da Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:26:46 -0600
Subject: [PATCH 35/50] Normalize author names with run together initials

---
 src/calibre/ebooks/metadata/sources/base.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index ac95860f66..fe57124cae 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -109,8 +109,12 @@ def get_cached_cover_urls(mi):
             yield (p, url)
 
 def cap_author_token(token):
-    if lower(token) in ('von', 'de', 'el', 'van', 'le'):
-        return lower(token)
+    lt = lower(token)
+    if lt in ('von', 'de', 'el', 'van', 'le'):
+        return lt
+    if re.match(r'([a-z]\.){2,}$', lt) is not None:
+        parts = token.split('.')
+        return '. '.join(map(capitalize, parts)).strip()
     return capitalize(token)
 
 def fixauthors(authors):

From 67eb873eab8b2ef2c5e512f9cc6aef519b47994e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 00:28:24 -0600
Subject: [PATCH 36/50] ...

---
 src/calibre/ebooks/metadata/sources/base.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index fe57124cae..faa7420081 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -113,6 +113,7 @@ def cap_author_token(token):
     if lt in ('von', 'de', 'el', 'van', 'le'):
         return lt
     if re.match(r'([a-z]\.){2,}$', lt) is not None:
+        # Normalize tokens of the form J.K. to J. K.
         parts = token.split('.')
         return '. '.join(map(capitalize, parts)).strip()
     return capitalize(token)

From 97c5b041a3fcaa2632f42973b278cb1f42e78118 Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 6 Apr 2011 08:11:27 +0100
Subject: [PATCH 37/50] When drag & dropping onto the tag browser, set the
 current node to the one dropped upon.

---
 src/calibre/gui2/tag_view.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/calibre/gui2/tag_view.py b/src/calibre/gui2/tag_view.py
index f86e261443..73f423981a 100644
--- a/src/calibre/gui2/tag_view.py
+++ b/src/calibre/gui2/tag_view.py
@@ -985,6 +985,7 @@ class TagsModel(QAbstractItemModel): # {{{
     def do_drop_from_library(self, md, action, row, column, parent):
         idx = parent
         if idx.isValid():
+            self.tags_view.setCurrentIndex(idx)
             node = self.data(idx, Qt.UserRole)
             if node.type == TagTreeItem.TAG:
                 fm = self.db.metadata_for_field(node.tag.category)

From 049776de273cd8bb77fd81887cad0eeb008bc930 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 08:24:25 -0600
Subject: [PATCH 38/50] ...

---
 src/calibre/manual/faq.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index 97ef32e9d4..f48fa9dc16 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -99,7 +99,8 @@ We just need some information from you:
     device.
 
 Once you send us the output for a particular operating system, support for the device in that operating system
-will appear in the next release of |app|.
+will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
+See `http://calibre-ebook.com/bugs`_.
 
 My device is not being detected by |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 593f3aaf0a6f08bbab384a66d0f4af9bf074d397 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 08:44:50 -0600
Subject: [PATCH 39/50] Support for Motorola Atrix

---
 src/calibre/devices/android/driver.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 54e4979524..7702a7caf0 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -36,7 +36,9 @@ class ANDROID(USBMS):
             # Motorola
             0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
                        0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
-                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216] },
+                       0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
+                       0x7086 : [0x0226],
+                     },
 
             # Sony Ericsson
             0xfce : { 0xd12e : [0x0100]},
@@ -101,7 +103,8 @@ class ANDROID(USBMS):
             'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2']
+            '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
+            'MB860']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7']

From 504ef950568ab8fcdd0b04c7af5de78ffd4ab0a1 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 10:03:46 -0600
Subject: [PATCH 40/50] When dealing with ZIP/RAR archives, use the file header
 rather than the file extension to detrmine the file type, when possible. This
 fixes the common case of CBZ files being actually cbr files and vice versa

---
 src/calibre/__init__.py           | 21 ++++++++++++++++-----
 src/calibre/customize/builtins.py |  8 ++++++++
 2 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/src/calibre/__init__.py b/src/calibre/__init__.py
index 1799072045..2f457bf2bc 100644
--- a/src/calibre/__init__.py
+++ b/src/calibre/__init__.py
@@ -217,14 +217,25 @@ def filename_to_utf8(name):
     return name.decode(codec, 'replace').encode('utf8')
 
 def extract(path, dir):
-    ext = os.path.splitext(path)[1][1:].lower()
     extractor = None
-    if ext in ['zip', 'cbz', 'epub', 'oebzip']:
-        from calibre.libunzip import extract as zipextract
-        extractor = zipextract
-    elif ext in ['cbr', 'rar']:
+    # First use the file header to identify its type
+    with open(path, 'rb') as f:
+        id_ = f.read(3)
+    if id_ == b'Rar':
         from calibre.libunrar import extract as rarextract
         extractor = rarextract
+    elif id_.startswith(b'PK'):
+        from calibre.libunzip import extract as zipextract
+        extractor = zipextract
+    if extractor is None:
+        # Fallback to file extension
+        ext = os.path.splitext(path)[1][1:].lower()
+        if ext in ['zip', 'cbz', 'epub', 'oebzip']:
+            from calibre.libunzip import extract as zipextract
+            extractor = zipextract
+        elif ext in ['cbr', 'rar']:
+            from calibre.libunrar import extract as rarextract
+            extractor = rarextract
     if extractor is None:
         raise Exception('Unknown archive type')
     extractor(path, dir)
diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 1e40a8e5ff..91abfacc95 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -166,6 +166,14 @@ class ComicMetadataReader(MetadataReaderPlugin):
     description = _('Extract cover from comic files')
 
     def get_metadata(self, stream, ftype):
+        if hasattr(stream, 'seek') and hasattr(stream, 'tell'):
+            pos = stream.tell()
+            id_ = stream.read(3)
+            stream.seek(pos)
+            if id_ == b'Rar':
+                ftype = 'cbr'
+            elif id.startswith(b'PK'):
+                ftype = 'cbz'
         if ftype == 'cbr':
             from calibre.libunrar import extract_first_alphabetically as extract_first
             extract_first

From ddf6bd19f557a8f546422da0c10c667dd623fc18 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 10:50:55 -0600
Subject: [PATCH 41/50] Add a 'plugin tweak' test_eight_code which if set to
 True will cause calibre to use code intended for the 0.8.x series. Note that
 this code is in heavy development so only set this tweak if you are OK with
 having parts of calibre broken.

---
 src/calibre/customize/builtins.py | 26 ++++++++++++++++----------
 src/calibre/utils/config.py       |  1 +
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 91abfacc95..93cdfe50d9 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -10,6 +10,7 @@ from calibre.constants import numeric_version
 from calibre.ebooks.metadata.archive import ArchiveExtract, get_cbz_metadata
 from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.oeb.base import OEB_IMAGES
+from calibre.utils.config import test_eight_code
 
 # To archive plugins {{{
 class HTML2ZIP(FileTypePlugin):
@@ -612,20 +613,25 @@ from calibre.devices.folder_device.driver import FOLDER_DEVICE_FOR_CONFIG
 from calibre.devices.kobo.driver import KOBO
 from calibre.devices.bambook.driver import BAMBOOK
 
-from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
-    KentDistrictLibrary
-from calibre.ebooks.metadata.douban import DoubanBooks
-from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
-from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
-        AmazonCovers, DoubanCovers
 from calibre.library.catalog import CSV_XML, EPUB_MOBI, BIBTEX
 from calibre.ebooks.epub.fix.unmanifested import Unmanifested
 from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 
-plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, GoogleBooks, ISBNDB, Amazon,
-        KentDistrictLibrary, DoubanBooks, NiceBooks, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
-        Epubcheck, OpenLibraryCovers, AmazonCovers, DoubanCovers,
-        NiceBooksCovers]
+plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
+        Epubcheck, ]
+
+if not test_eight_code:
+    from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
+        KentDistrictLibrary
+    from calibre.ebooks.metadata.douban import DoubanBooks
+    from calibre.ebooks.metadata.nicebooks import NiceBooks, NiceBooksCovers
+    from calibre.ebooks.metadata.covers import OpenLibraryCovers, \
+            AmazonCovers, DoubanCovers
+
+    plugins += [GoogleBooks, ISBNDB, Amazon,
+        OpenLibraryCovers, AmazonCovers, DoubanCovers,
+        NiceBooksCovers, KentDistrictLibrary, DoubanBooks, NiceBooks]
+
 plugins += [
     ComicInput,
     EPUBInput,
diff --git a/src/calibre/utils/config.py b/src/calibre/utils/config.py
index d5a489acf1..66316d051b 100644
--- a/src/calibre/utils/config.py
+++ b/src/calibre/utils/config.py
@@ -784,6 +784,7 @@ def write_tweaks(raw):
 
 
 tweaks = read_tweaks()
+test_eight_code = tweaks.get('test_eight_code', False)
 
 def migrate():
     if hasattr(os, 'geteuid') and os.geteuid() == 0:

From 086a2959173f56fc27d9d55008dc66c4cba8d0bb Mon Sep 17 00:00:00 2001
From: Charles Haley <>
Date: Wed, 6 Apr 2011 17:56:54 +0100
Subject: [PATCH 42/50] Fix #751950: make content server ignore non-existent
 search restrictions.

---
 src/calibre/library/server/base.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/server/base.py b/src/calibre/library/server/base.py
index dba6abbfa5..eea28469a9 100644
--- a/src/calibre/library/server/base.py
+++ b/src/calibre/library/server/base.py
@@ -24,6 +24,8 @@ from calibre.library.server.xml import XMLServer
 from calibre.library.server.opds import OPDSServer
 from calibre.library.server.cache import Cache
 from calibre.library.server.browse import BrowseServer
+from calibre.utils.search_query_parser import saved_searches
+from calibre import prints
 
 
 class DispatchController(object): # {{{
@@ -178,7 +180,12 @@ class LibraryServer(ContentServer, MobileServer, XMLServer, OPDSServer, Cache,
     def set_search_restriction(self, restriction):
         self.search_restriction_name = restriction
         if restriction:
-            self.search_restriction = 'search:"%s"'%restriction
+            if restriction not in saved_searches().names():
+                prints('WARNING: Content server: search restriction ',
+                       restriction, ' does not exist')
+                self.search_restriction = ''
+            else:
+                self.search_restriction = 'search:"%s"'%restriction
         else:
             self.search_restriction = ''
         self.reset_caches()

From 261df5b15d1bb9636f2adf6fb982708fb3c35f91 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 11:09:50 -0600
Subject: [PATCH 43/50] Use test_eight_code in fetch-ebook-metadata

---
 src/calibre/customize/builtins.py          | 18 ++++++++++--------
 src/calibre/ebooks/metadata/sources/cli.py |  9 ++++++++-
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/src/calibre/customize/builtins.py b/src/calibre/customize/builtins.py
index 93cdfe50d9..298799daa5 100644
--- a/src/calibre/customize/builtins.py
+++ b/src/calibre/customize/builtins.py
@@ -620,7 +620,16 @@ from calibre.ebooks.epub.fix.epubcheck import Epubcheck
 plugins = [HTML2ZIP, PML2PMLZ, TXT2TXTZ, ArchiveExtract, CSV_XML, EPUB_MOBI, BIBTEX, Unmanifested,
         Epubcheck, ]
 
-if not test_eight_code:
+if test_eight_code:
+# New metadata download plugins {{{
+    from calibre.ebooks.metadata.sources.google import GoogleBooks
+    from calibre.ebooks.metadata.sources.amazon import Amazon
+    from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
+
+    plugins += [GoogleBooks, Amazon, OpenLibrary]
+
+# }}}
+else:
     from calibre.ebooks.metadata.fetch import GoogleBooks, ISBNDB, Amazon, \
         KentDistrictLibrary
     from calibre.ebooks.metadata.douban import DoubanBooks
@@ -1069,11 +1078,4 @@ plugins += [LookAndFeel, Behavior, Columns, Toolbar, Search, InputOptions,
 
 #}}}
 
-# New metadata download plugins {{{
-from calibre.ebooks.metadata.sources.google import GoogleBooks
-from calibre.ebooks.metadata.sources.amazon import Amazon
-from calibre.ebooks.metadata.sources.openlibrary import OpenLibrary
 
-plugins += [GoogleBooks, Amazon, OpenLibrary]
-
-# }}}
diff --git a/src/calibre/ebooks/metadata/sources/cli.py b/src/calibre/ebooks/metadata/sources/cli.py
index 58042da2bf..cb422f939d 100644
--- a/src/calibre/ebooks/metadata/sources/cli.py
+++ b/src/calibre/ebooks/metadata/sources/cli.py
@@ -19,9 +19,13 @@ from calibre.ebooks.metadata.opf2 import metadata_to_opf
 from calibre.ebooks.metadata.sources.base import create_log
 from calibre.ebooks.metadata.sources.identify import identify
 from calibre.ebooks.metadata.sources.covers import download_cover
-
+from calibre.utils.config import test_eight_code
 
 def option_parser():
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import option_parser
+        return option_parser()
+
     parser = OptionParser(textwrap.dedent(
         '''\
         %prog [options]
@@ -44,6 +48,9 @@ def option_parser():
     return parser
 
 def main(args=sys.argv):
+    if not test_eight_code:
+        from calibre.ebooks.metadata.fetch import main
+        return main(args)
     parser = option_parser()
     opts, args = parser.parse_args(args)
 

From d63d47a9f53ffa73017351802c454cd10d010062 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:28:07 -0600
Subject: [PATCH 44/50] Use new edit metadata dialog when test_eight_code is
 True

---
 src/calibre/gui2/actions/edit_metadata.py  | 50 +++++++++++++++++-----
 src/calibre/gui2/metadata/basic_widgets.py |  5 +--
 src/calibre/gui2/metadata/single.py        | 22 ++++++++--
 3 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/src/calibre/gui2/actions/edit_metadata.py b/src/calibre/gui2/actions/edit_metadata.py
index 3f053e5223..c3ceb27e7e 100644
--- a/src/calibre/gui2/actions/edit_metadata.py
+++ b/src/calibre/gui2/actions/edit_metadata.py
@@ -17,6 +17,7 @@ from calibre.gui2.dialogs.confirm_delete import confirm
 from calibre.gui2.dialogs.tag_list_editor import TagListEditor
 from calibre.gui2.actions import InterfaceAction
 from calibre.utils.icu import sort_key
+from calibre.utils.config import test_eight_code
 
 class EditMetadataAction(InterfaceAction):
 
@@ -133,8 +134,6 @@ class EditMetadataAction(InterfaceAction):
 
         row_list = [r.row() for r in rows]
         current_row = 0
-        changed = set([])
-        db = self.gui.library_view.model().db
 
         if len(row_list) == 1:
             cr = row_list[0]
@@ -142,6 +141,24 @@ class EditMetadataAction(InterfaceAction):
                 list(range(self.gui.library_view.model().rowCount(QModelIndex())))
             current_row = row_list.index(cr)
 
+        if test_eight_code:
+            changed = self.do_edit_metadata(row_list, current_row)
+        else:
+            changed = self.do_edit_metadata_old(row_list, current_row)
+
+        if changed:
+            self.gui.library_view.model().refresh_ids(list(changed))
+            current = self.gui.library_view.currentIndex()
+            m = self.gui.library_view.model()
+            if self.gui.cover_flow:
+                self.gui.cover_flow.dataChanged()
+            m.current_changed(current, previous)
+            self.gui.tags_view.recount()
+
+    def do_edit_metadata_old(self, row_list, current_row):
+        changed = set([])
+        db = self.gui.library_view.model().db
+
         while True:
             prev = next_ = None
             if current_row > 0:
@@ -167,15 +184,28 @@ class EditMetadataAction(InterfaceAction):
             self.gui.library_view.set_current_row(current_row)
             self.gui.library_view.scroll_to_row(current_row)
 
+    def do_edit_metadata(self, row_list, current_row):
+        from calibre.gui2.metadata.single import edit_metadata
+        db = self.gui.library_view.model().db
+        changed, rows_to_refresh = edit_metadata(db, row_list, current_row,
+                parent=self.gui, view_slot=self.view_format_callback,
+                set_current_callback=self.set_current_callback)
+        return changed
+
+    def set_current_callback(self, id_):
+        db = self.gui.library_view.model().db
+        current_row = db.row(id_)
+        self.gui.library_view.set_current_row(current_row)
+        self.gui.library_view.scroll_to_row(current_row)
+
+    def view_format_callback(self, id_, fmt):
+        view = self.gui.iactions['View']
+        if id_ is None:
+            view._view_file(fmt)
+        else:
+            db = self.gui.library_view.model().db
+            view.view_format(db.row(id_), fmt)
 
-        if changed:
-            self.gui.library_view.model().refresh_ids(list(changed))
-            current = self.gui.library_view.currentIndex()
-            m = self.gui.library_view.model()
-            if self.gui.cover_flow:
-                self.gui.cover_flow.dataChanged()
-            m.current_changed(current, previous)
-            self.gui.tags_view.recount()
 
     def edit_bulk_metadata(self, checked):
         '''
diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index bab9073588..0b7d96c07c 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -428,7 +428,7 @@ class Format(QListWidgetItem): # {{{
         if timestamp is not None:
             ts = timestamp.astimezone(local_tz)
             t = strftime('%a, %d %b %Y [%H:%M:%S]', ts.timetuple())
-            text = _('Last modified: %s')%t
+            text = _('Last modified: %s\n\nDouble click to view')%t
             self.setToolTip(text)
             self.setStatusTip(text)
 
@@ -577,8 +577,7 @@ class FormatsManager(QWidget): # {{{
             self.changed = True
 
     def show_format(self, item, *args):
-        fmt = item.ext
-        self.dialog.view_format.emit(fmt)
+        self.dialog.do_view_format.emit(item.path, item.ext)
 
     def get_selected_format_metadata(self, db, id_):
         old = prefs['read_file_metadata']
diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index 70307eb3b1..bba8528573 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -26,7 +26,7 @@ from calibre.utils.config import tweaks
 
 class MetadataSingleDialogBase(ResizableDialog):
 
-    view_format = pyqtSignal(object)
+    view_format = pyqtSignal(object, object)
     cc_two_column = tweaks['metadata_single_use_2_cols_for_custom_fields']
     one_line_comments_toolbar = False
 
@@ -194,6 +194,13 @@ class MetadataSingleDialogBase(ResizableDialog):
                 pass # Do something
     # }}}
 
+    def do_view_format(self, path, fmt):
+        if path:
+            self.view_format.emit(None, path)
+        else:
+            self.view_format.emit(self.book_id, fmt)
+
+
     def do_layout(self):
         raise NotImplementedError()
 
@@ -204,6 +211,8 @@ class MetadataSingleDialogBase(ResizableDialog):
             widget.initialize(self.db, id_)
         for widget in getattr(self, 'custom_metadata_widgets', []):
             widget.initialize(id_)
+        if callable(self.set_current_callback):
+            self.set_current_callback(id_)
         # Commented out as it doesn't play nice with Next, Prev buttons
         #self.fetch_metadata_button.setFocus(Qt.OtherFocusReason)
 
@@ -339,11 +348,13 @@ class MetadataSingleDialogBase(ResizableDialog):
         gprefs['metasingle_window_geometry3'] = bytearray(self.saveGeometry())
 
     # Dialog use methods {{{
-    def start(self, row_list, current_row, view_slot=None):
+    def start(self, row_list, current_row, view_slot=None,
+            set_current_callback=None):
         self.row_list = row_list
         self.current_row = current_row
         if view_slot is not None:
             self.view_format.connect(view_slot)
+        self.set_current_callback = set_current_callback
         self.do_one(apply_changes=False)
         ret = self.exec_()
         self.break_cycles()
@@ -375,6 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
     def break_cycles(self):
         # Break any reference cycles that could prevent python
         # from garbage collecting this dialog
+        self.set_current_callback = None
         def disconnect(signal):
             try:
                 signal.disconnect()
@@ -643,9 +655,11 @@ class MetadataSingleDialogAlt(MetadataSingleDialogBase): # {{{
 # }}}
 
 
-def edit_metadata(db, row_list, current_row, parent=None, view_slot=None):
+def edit_metadata(db, row_list, current_row, parent=None, view_slot=None,
+        set_current_callback=None):
     d = MetadataSingleDialog(db, parent)
-    d.start(row_list, current_row, view_slot=view_slot)
+    d.start(row_list, current_row, view_slot=view_slot,
+            set_current_callback=set_current_callback)
     return d.changed, d.rows_to_refresh
 
 if __name__ == '__main__':

From 901960ec044b8689a5cdf9062a69cdeae8306940 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:29:29 -0600
Subject: [PATCH 45/50] ...

---
 src/calibre/gui2/metadata/basic_widgets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/gui2/metadata/basic_widgets.py b/src/calibre/gui2/metadata/basic_widgets.py
index 0b7d96c07c..b0b7115ca1 100644
--- a/src/calibre/gui2/metadata/basic_widgets.py
+++ b/src/calibre/gui2/metadata/basic_widgets.py
@@ -577,7 +577,7 @@ class FormatsManager(QWidget): # {{{
             self.changed = True
 
     def show_format(self, item, *args):
-        self.dialog.do_view_format.emit(item.path, item.ext)
+        self.dialog.do_view_format(item.path, item.ext)
 
     def get_selected_format_metadata(self, db, id_):
         old = prefs['read_file_metadata']

From 67a467107ea387042880d2257c8c61a063b80b4f Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:35:01 -0600
Subject: [PATCH 46/50] ...

---
 src/calibre/gui2/metadata/single.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/calibre/gui2/metadata/single.py b/src/calibre/gui2/metadata/single.py
index bba8528573..4f66e0d2ba 100644
--- a/src/calibre/gui2/metadata/single.py
+++ b/src/calibre/gui2/metadata/single.py
@@ -32,9 +32,9 @@ class MetadataSingleDialogBase(ResizableDialog):
 
     def __init__(self, db, parent=None):
         self.db = db
-        self.changed = set([])
-        self.books_to_refresh = set([])
-        self.rows_to_refresh = set([])
+        self.changed = set()
+        self.books_to_refresh = set()
+        self.rows_to_refresh = set()
         ResizableDialog.__init__(self, parent)
 
     def setupUi(self, *args): # {{{
@@ -386,7 +386,7 @@ class MetadataSingleDialogBase(ResizableDialog):
     def break_cycles(self):
         # Break any reference cycles that could prevent python
         # from garbage collecting this dialog
-        self.set_current_callback = None
+        self.set_current_callback = self.db = None
         def disconnect(signal):
             try:
                 signal.disconnect()

From 2e08bc51712079312a96c18d0dbb0481bed56bc0 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 12:41:10 -0600
Subject: [PATCH 47/50] Fix #752464 ("Kommersant" recipe is broken)

---
 recipes/kommersant.recipe | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/recipes/kommersant.recipe b/recipes/kommersant.recipe
index f24a5da909..09fb8f8ad8 100644
--- a/recipes/kommersant.recipe
+++ b/recipes/kommersant.recipe
@@ -1,5 +1,5 @@
 __license__   = 'GPL v3'
-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
+__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>'
 '''
 www.kommersant.ru
 '''
@@ -20,7 +20,13 @@ class Kommersant_ru(BasicNewsRecipe):
     language              = 'ru'
     publication_type      = 'newspaper'
     masthead_url          = 'http://www.kommersant.ru/CorpPics/logo_daily_1.gif'
-    extra_css             = ' @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)} body{font-family: Arial, sans1, sans-serif} span#ctl00_ContentPlaceHolderStyle_LabelSubTitle{margin-bottom: 1em; display: block} .author{margin-bottom: 1em; display: block} .paragraph{margin-bottom: 1em; display: block} .vvodka{font-weight: bold; margin-bottom: 1em} '
+    extra_css             = """ 
+	                          @font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
+	                          body{font-family: Tahoma, Arial, Helvetica, sans1, sans-serif}
+							  .title{font-size: x-large; font-weight: bold; margin-bottom: 1em}
+							  .subtitle{font-size: large; margin-bottom: 1em}
+							  .document_vvodka{font-weight: bold; margin-bottom: 1em}
+							"""
 
     conversion_options = {
                           'comment'          : description
@@ -29,14 +35,11 @@ class Kommersant_ru(BasicNewsRecipe):
                         , 'language'         : language
                         }
 
-    keep_only_tags = [
-                         dict(attrs={'id':'ctl00_ContentPlaceHolderStyle_PanelHeader'})
-                        ,dict(attrs={'class':['vvodka','paragraph','author']})
-                     ]
-    remove_tags        = [dict(name=['iframe','object','link','img','base'])]
+    keep_only_tags = [dict(attrs={'class':['document','document_vvodka','document_text','document_authors vblock']})]
+    remove_tags    = [dict(name=['iframe','object','link','img','base','meta'])]
 
     feeds       = [(u'Articles', u'http://feeds.kommersant.ru/RSS_Export/RU/daily.xml')]
 
-    def print_version(self, url):
-        return url.replace('doc-rss.aspx','doc.aspx') + '&print=true'
-
+    def print_version(self, url):	    
+        return url.replace('/doc-rss/','/Doc/') + '/Print'
+		
\ No newline at end of file

From cad3b71b324ffb280b90c309bfdbe7ea376a1430 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 13:54:10 -0600
Subject: [PATCH 48/50] ...

---
 src/calibre/manual/gui.rst   | 3 ++-
 src/calibre/manual/index.rst | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/calibre/manual/gui.rst b/src/calibre/manual/gui.rst
index 9307ff30f6..7b6e60c93a 100644
--- a/src/calibre/manual/gui.rst
+++ b/src/calibre/manual/gui.rst
@@ -71,7 +71,7 @@ Edit metadata
 
 |emii| The :guilabel:`Edit metadata` action has six variations, which can be accessed by clicking the down arrow on the right side of the button.
 
-    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. For more detail see :ref:`metadata`.
+    1. **Edit metadata individually**: This allows you to edit the metadata of books one-by-one, with the option of fetching metadata, including covers from the internet. It also allows you to add/remove particular ebook formats from a book. 
     2. **Edit metadata in bulk**: This allows you to edit common metadata fields for large numbers of books simulataneously. It operates on all the books you have selected in the :ref:`Library view <search_sort>`.
     3. **Download metadata and covers**: Downloads metadata and covers (if available), for the books that are selected in the book list.
     4. **Download only metadata**: Downloads only metadata (if available), for the books that are selected in the book list.
@@ -79,6 +79,7 @@ Edit metadata
     6. **Download only social metadata**: Downloads only social metadata such as tags and reviews (if available), for the books that are selected in the book list.
     7. **Merge Book Records**: Gives you the capability of merging the metadata and formats of two or more book records together. You can choose to either delete or keep the records that were not clicked first.
 
+For more details see :ref:`metadata`.
 
 .. _convert_ebooks:
 
diff --git a/src/calibre/manual/index.rst b/src/calibre/manual/index.rst
index 996a1de382..e54882dda0 100644
--- a/src/calibre/manual/index.rst
+++ b/src/calibre/manual/index.rst
@@ -70,7 +70,7 @@ Customizing |app|'s e-book conversion
 .. toctree::
    :maxdepth: 2
 
-   viewer
+   conversion
 
 Editing e-book metadata
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -78,7 +78,7 @@ Editing e-book metadata
 .. toctree::
    :maxdepth: 2
 
-   viewer
+   metadata
 
 Frequently Asked Questions
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^

From 3e230ac838eab493f7125534fb024f1f01eaefb9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 15:46:51 -0600
Subject: [PATCH 49/50] ...

---
 src/calibre/manual/conf.py | 2 +-
 src/calibre/manual/faq.rst | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/manual/conf.py b/src/calibre/manual/conf.py
index fc8962bcfd..d2b3a91d8d 100644
--- a/src/calibre/manual/conf.py
+++ b/src/calibre/manual/conf.py
@@ -126,7 +126,7 @@ html_use_modindex = False
 html_use_index = False
 
 # If true, the reST sources are included in the HTML build as _sources/<name>.
-html_copy_source = False
+html_copy_source = True
 
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'calibredoc'
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index f48fa9dc16..f8b257fd75 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -100,7 +100,7 @@ We just need some information from you:
 
 Once you send us the output for a particular operating system, support for the device in that operating system
 will appear in the next release of |app|. To send us the output, open a bug report and attach the output to it.
-See `http://calibre-ebook.com/bugs`_.
+See `calibre bugs <http://calibre-ebook.com/bugs>`_.
 
 My device is not being detected by |app|?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 58899e65ef4e532976f59fb6da1c1484a9a5ad4d Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 6 Apr 2011 16:23:18 -0600
Subject: [PATCH 50/50] ...

---
 src/calibre/gui2/metadata/single_download.py | 39 ++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 src/calibre/gui2/metadata/single_download.py

diff --git a/src/calibre/gui2/metadata/single_download.py b/src/calibre/gui2/metadata/single_download.py
new file mode 100644
index 0000000000..ace4133d7a
--- /dev/null
+++ b/src/calibre/gui2/metadata/single_download.py
@@ -0,0 +1,39 @@
+#!/usr/bin/env python
+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+
+__license__   = 'GPL v3'
+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
+__docformat__ = 'restructuredtext en'
+
+from PyQt4.Qt import (QStyledItemDelegate, QTextDocument, QRectF,
+        QStyle, QApplication)
+
+class RichTextDelegate(QStyledItemDelegate): # {{{
+
+    def __init__(self, parent=None):
+        QStyledItemDelegate.__init__(self, parent)
+
+    def to_doc(self, index):
+        doc = QTextDocument()
+        doc.setHtml(index.data().toString())
+        return doc
+
+    def sizeHint(self, option, index):
+        ans = self.to_doc(index).size().toSize()
+        ans.setHeight(ans.height()+10)
+        return ans
+
+    def paint(self, painter, option, index):
+        painter.save()
+        painter.setClipRect(QRectF(option.rect))
+        if hasattr(QStyle, 'CE_ItemViewItem'):
+            QApplication.style().drawControl(QStyle.CE_ItemViewItem, option, painter)
+        elif option.state & QStyle.State_Selected:
+            painter.fillRect(option.rect, option.palette.highlight())
+        painter.translate(option.rect.topLeft())
+        self.to_doc(index).drawContents(painter)
+        painter.restore()
+# }}}
+