From ca5703b250753a728c120099fb3053d6b940e64a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 10:44:56 -0600
Subject: [PATCH 01/15] ...

---
 src/calibre/gui2/main_window.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/main_window.py b/src/calibre/gui2/main_window.py
index ec58dd3856..134aae3ad1 100644
--- a/src/calibre/gui2/main_window.py
+++ b/src/calibre/gui2/main_window.py
@@ -20,7 +20,8 @@ Usage: %prog [options]
 Launch the Graphical User Interface
 '''):
     parser = OptionParser(usage)
-    parser.add_option('--redirect-console-output', default=False, action='store_true', dest='redirect',
+    # The b is required because of a regression in optparse.py in python 2.7.0
+    parser.add_option(b'--redirect-console-output', default=False, action='store_true', dest='redirect',
                       help=_('Redirect console output to a dialog window (both stdout and stderr). Useful on windows where GUI apps do not have a output streams.'))
     return parser
 

From 1edc4f5a800473d80196139b502ce63bb7d70c1a Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 10:49:10 -0600
Subject: [PATCH 02/15] More work on the new Amazon metadata download plugin

---
 src/calibre/ebooks/metadata/sources/amazon.py | 120 +++++++++++++++++-
 src/calibre/manual/faq.rst                    |   4 +-
 2 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index a62a9683cb..33ea24c421 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -7,7 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
 __docformat__ = 'restructuredtext en'
 
-import socket, time
+import socket, time, re
 from urllib import urlencode
 from threading import Thread
 
@@ -18,9 +18,15 @@ from calibre.ebooks.metadata import check_isbn
 from calibre.ebooks.metadata.sources.base import Source
 from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.chardet import xml_to_unicode
+from calibre.ebooks.metadata.book.base import Metadata
+from calibre.library.comments import sanitize_comments_html
 
 class Worker(Thread):
 
+    '''
+    Get book details from amazons book page in a separate thread
+    '''
+
     def __init__(self, url, result_queue, browser, log, timeout=20):
         self.url, self.result_queue = url, result_queue
         self.log, self.timeout = log, timeout
@@ -75,7 +81,117 @@ class Worker(Thread):
         self.parse_details(root)
 
     def parse_details(self, root):
-        pass
+        try:
+            asin = self.parse_asin(root)
+        except:
+            self.log.exception('Error parsing asin for url: %r'%self.url)
+            asin = None
+
+        try:
+            title = self.parse_title(root)
+        except:
+            self.log.exception('Error parsing title for url: %r'%self.url)
+            title = None
+
+        try:
+            authors = self.parse_authors(root)
+        except:
+            self.log.exception('Error parsing authors for url: %r'%self.url)
+            authors = []
+
+
+        if not title or not authors or not asin:
+            self.log.error('Could not find title/authors/asin for %r'%self.url)
+            self.log.error('ASIN: %r Title: %r Authors: %r'%(asin, title,
+                authors))
+            return
+
+        mi = Metadata(title, authors)
+        mi.set_identifier('amazon', asin)
+        self.amazon_id = asin
+
+        try:
+            mi.rating = self.parse_ratings(root)
+        except:
+            self.log.exception('Error parsing ratings for url: %r'%self.url)
+
+        try:
+            mi.comments = self.parse_comments(root)
+        except:
+            self.log.exception('Error parsing comments for url: %r'%self.url)
+
+        try:
+            self.cover_url = self.parse_cover(root)
+        except:
+            self.log.exception('Error parsing cover for url: %r'%self.url)
+
+        self.result_queue.put(mi)
+
+    def parse_asin(self, root):
+        link = root.xpath('//link[@rel="canonical" and @href]')
+        for l in link:
+            return l.get('href').rpartition('/')[-1]
+
+    def parse_title(self, root):
+        tdiv = root.xpath('//h1[@class="parseasinTitle"]')[0]
+        actual_title = tdiv.xpath('descendant::*[@id="btAsinTitle"]')
+        if actual_title:
+            title = tostring(actual_title[0], encoding=unicode,
+                    method='text').strip()
+        else:
+            title = tostring(tdiv, encoding=unicode, method='text').strip()
+        return re.sub(r'[([].*[)]]', '', title).strip()
+
+    def parse_authors(self, root):
+        bdiv = root.xpath('//div[@class="buying"]')[0]
+        aname = bdiv.xpath('descendant::span[@class="contributorNameTrigger"]')
+        authors = [tostring(x, encoding=unicode, method='text').strip() for x
+                in aname]
+        return authors
+
+    def parse_ratings(self, root):
+        ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
+        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
+        if ratings:
+            for elem in ratings[0].xpath('descendant::*[@title]'):
+                t = elem.get('title')
+                m = pat.match(t)
+                if m is not None:
+                    try:
+                        return float(m.group(1))/float(m.group(2)) * 5
+                    except:
+                        pass
+
+    def parse_comments(self, root):
+        desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
+        if desc:
+            desc = desc[0]
+            for c in desc.xpath('descendant::*[@class="seeAll" or'
+                    ' @class="emptyClear" or @href]'):
+                c.getparent().remove(c)
+            desc = tostring(desc, method='html', encoding=unicode).strip()
+            # remove all attributes from tags
+            desc = re.sub(r'<([a-zA-Z0-9]+)\s[^>]+>', r'<\1>', desc)
+            # Collapse whitespace
+            #desc = re.sub('\n+', '\n', desc)
+            #desc = re.sub(' +', ' ', desc)
+            # Remove the notice about text referring to out of print editions
+            desc = re.sub(r'(?s)<em>--This text ref.*?</em>', '', desc)
+            # Remove comments
+            desc = re.sub(r'(?s)<!--.*?-->', '', desc)
+            return sanitize_comments_html(desc)
+
+    def parse_cover(self, root):
+        imgs = root.xpath('//img[@id="prodImage" and @src]')
+        if imgs:
+            src = imgs[0].get('src')
+            parts = src.split('/')
+            if len(parts) > 3:
+                bn = parts[-1]
+                sparts = bn.split('_')
+                if len(sparts) > 2:
+                    bn = sparts[0] + sparts[-1]
+                    return ('/'.join(parts[:-1]))+'/'+bn
 
 
 class Amazon(Source):
diff --git a/src/calibre/manual/faq.rst b/src/calibre/manual/faq.rst
index a3d4332fd0..948611f775 100644
--- a/src/calibre/manual/faq.rst
+++ b/src/calibre/manual/faq.rst
@@ -508,9 +508,9 @@ You have two choices:
  1. Create a patch by hacking on |app| and send it to me for review and inclusion. See `Development <http://calibre-ebook.com/get-involved>`_.
  2. `Open a ticket <http://bugs.calibre-ebook.com/newticket>`_ (you have to register and login first). Remember that |app| development is done by volunteers, so if you get no response to your feature request, it means no one feels like implementing it.
 
-Can I include |app| on a CD to be distributed with my product/magazine?
+How is |app| licensed?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_.
+|app| is licensed under the GNU General Public License v3 (an open source license). This means that you are free to redistribute |app| as long as you make the source code available. So if you want to put |app| on a CD with your product, you must also put the |app| source code on the CD. The source code is available for download `from googlecode <http://code.google.com/p/calibre-ebook/downloads/list>`_. You are free to use the results of conversions from |app| however you want. You cannot use code, libraries from |app| in your software without maing your software open source. For details, see `The GNU GPL v3 http://www.gnu.org/licenses/gpl.html`_.
 
 How do I run calibre from my USB stick?
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From 103d994f176762c715c6fc1a88402356a44672bc Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 10:52:06 -0600
Subject: [PATCH 03/15] Fix #9418 (Calibre 0.7.49 not detecting Verizon
 Motorola Droid 2)

---
 src/calibre/devices/android/driver.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/devices/android/driver.py b/src/calibre/devices/android/driver.py
index 0491f34d78..1ddc14bd1f 100644
--- a/src/calibre/devices/android/driver.py
+++ b/src/calibre/devices/android/driver.py
@@ -98,7 +98,7 @@ class ANDROID(USBMS):
             'SCH-I500_CARD', 'SPH-D700_CARD', 'MB810', 'GT-P1000', 'DESIRE',
             'SGH-T849', '_MB300', 'A70S', 'S_ANDROID', 'A101IT', 'A70H',
             'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
-            '7', 'A956']
+            '7', 'A956', 'A955']
     WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
             'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
             'A70S', 'A101IT', '7']

From 2ea99bfdaf3076360fe6fccb3f8ad02d2fd42680 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 11:37:52 -0600
Subject: [PATCH 04/15] Content server: Add workaround for Internet Explorer
 not supporting the &apos; entity. Fixes #9413 (Internet Explorer 8,
 Apostrophes in Book Titles in http://myhostname:8080/browse become &apos;)

---
 src/calibre/library/server/browse.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/calibre/library/server/browse.py b/src/calibre/library/server/browse.py
index 97bfc30f14..fd015f5848 100644
--- a/src/calibre/library/server/browse.py
+++ b/src/calibre/library/server/browse.py
@@ -12,7 +12,7 @@ import cherrypy
 
 from calibre.constants import filesystem_encoding
 from calibre import isbytestring, force_unicode, fit_image, \
-        prepare_string_for_xml as xml
+        prepare_string_for_xml
 from calibre.utils.ordered_dict import OrderedDict
 from calibre.utils.filenames import ascii_filename
 from calibre.utils.config import prefs, tweaks
@@ -23,6 +23,10 @@ from calibre.library.server import custom_fields_to_display
 from calibre.library.field_metadata import category_icon_map
 from calibre.library.server.utils import quote, unquote
 
+def xml(*args, **kwargs):
+    ans = prepare_string_for_xml(*args, **kwargs)
+    return ans.replace('&apos;', '&#39;')
+
 def render_book_list(ids, prefix, suffix=''): # {{{
     pages = []
     num = len(ids)

From 487e28f8697f04084086fc734fc43efef9428da4 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 11:53:25 -0600
Subject: [PATCH 05/15] Fix #9419 (Enable CTRL-F keyboard shortcut in ebook
 viewer)

---
 src/calibre/gui2/viewer/main.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 964616ab48..13e7066806 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -225,6 +225,12 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         self.action_quit.setShortcuts(qs)
         self.connect(self.action_quit, SIGNAL('triggered(bool)'),
                      lambda x:QApplication.instance().quit())
+        self.action_focus_search = QAction(self)
+        self.addAction(self.action_focus_search)
+        self.action_focus_search.setShortcuts([Qt.Key_Slash,
+            QKeySequence(QKeySequence.Find)])
+        self.action_focus_search.triggered.connect(lambda x:
+                self.search.setFocus(Qt.OtherFocusReason))
         self.action_copy.setDisabled(True)
         self.action_metadata.setCheckable(True)
         self.action_metadata.setShortcut(Qt.CTRL+Qt.Key_I)
@@ -494,12 +500,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         if self.view.search(text, backwards=backwards):
             self.scrolled(self.view.scroll_fraction)
 
-    def keyPressEvent(self, event):
-        if event.key() == Qt.Key_Slash:
-            self.search.setFocus(Qt.OtherFocusReason)
-        else:
-            return MainWindow.keyPressEvent(self, event)
-
     def internal_link_clicked(self, frac):
         self.history.add(self.pos.value())
 

From 412fbc6fbc9bceda3c35843fcf0cfe311eccad97 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 12:02:01 -0600
Subject: [PATCH 06/15] ...

---
 src/calibre/gui2/dialogs/scheduler.ui | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/calibre/gui2/dialogs/scheduler.ui b/src/calibre/gui2/dialogs/scheduler.ui
index f295703b33..f26bfc7285 100644
--- a/src/calibre/gui2/dialogs/scheduler.ui
+++ b/src/calibre/gui2/dialogs/scheduler.ui
@@ -6,7 +6,7 @@
    <rect>
     <x>0</x>
     <y>0</y>
-    <width>767</width>
+    <width>792</width>
     <height>575</height>
    </rect>
   </property>
@@ -44,7 +44,7 @@
        <rect>
         <x>0</x>
         <y>0</y>
-        <width>469</width>
+        <width>486</width>
         <height>504</height>
        </rect>
       </property>

From be945ddda012020ed896391cc28bc4362c782e93 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 12:07:41 -0600
Subject: [PATCH 07/15] Fix #9405 (Clearing last viewed book(s) history)

---
 src/calibre/gui2/viewer/main.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/calibre/gui2/viewer/main.py b/src/calibre/gui2/viewer/main.py
index 13e7066806..c704b98dc9 100644
--- a/src/calibre/gui2/viewer/main.py
+++ b/src/calibre/gui2/viewer/main.py
@@ -299,6 +299,9 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
         ca.setShortcut(QKeySequence.Copy)
         self.addAction(ca)
         self.open_history_menu = QMenu()
+        self.clear_recent_history_action = QAction(
+                _('Clear list of recently opened books'), self)
+        self.clear_recent_history_action.triggered.connect(self.clear_recent_history)
         self.build_recent_menu()
         self.action_open_ebook.setMenu(self.open_history_menu)
         self.open_history_menu.triggered[QAction].connect(self.open_recent)
@@ -307,11 +310,19 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
 
         self.restore_state()
 
+    def clear_recent_history(self, *args):
+        vprefs.set('viewer_open_history', [])
+        self.build_recent_menu()
+
     def build_recent_menu(self):
         m = self.open_history_menu
         m.clear()
+        recent = vprefs.get('viewer_open_history', [])
+        if recent:
+            m.addAction(self.clear_recent_history_action)
+            m.addSeparator()
         count = 0
-        for path in vprefs.get('viewer_open_history', []):
+        for path in recent:
             if count > 9:
                 break
             if os.path.exists(path):

From d05f8b79a7fe84b484c76801ad20acd0227015bf Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 13:08:05 -0600
Subject: [PATCH 08/15] Updated Sports Illustrated

---
 resources/recipes/sportsillustrated.recipe | 88 ++++++++++------------
 1 file changed, 38 insertions(+), 50 deletions(-)

diff --git a/resources/recipes/sportsillustrated.recipe b/resources/recipes/sportsillustrated.recipe
index f5a7b4c32b..bec63f74ef 100644
--- a/resources/recipes/sportsillustrated.recipe
+++ b/resources/recipes/sportsillustrated.recipe
@@ -1,6 +1,7 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
 #from calibre.ebooks.BeautifulSoup import BeautifulSoup
 from urllib import quote
+import re
 
 class SportsIllustratedRecipe(BasicNewsRecipe) :
     __author__  = 'kwetal'
@@ -15,65 +16,52 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
     remove_javascript = True
     use_embedded_content   = False
 
-    INDEX = 'http://sportsillustrated.cnn.com/'
+    INDEX = 'http://sportsillustrated.cnn.com/vault/cover/home/index.htm'
 
     def parse_index(self):
         answer = []
         soup = self.index_to_soup(self.INDEX)
-        # Find the link to the current issue on the front page. SI Cover
-        cover = soup.find('img', attrs = {'alt' : 'Read All Articles', 'style' : 'vertical-align:bottom;'})
-        if cover:
-            currentIssue = cover.parent['href']
-            if currentIssue:
-                # Open the index of current issue
 
-                index = self.index_to_soup(currentIssue)
-                self.log('\tLooking for current issue in: ' + currentIssue)
-                # Now let us see if they updated their frontpage
-                nav = index.find('div', attrs = {'class': 'siv_trav_top'})
-                if nav:
-                    img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_next_v2.jpg'})
-                    if img:
-                        parent = img.parent
-                        if parent.name == 'a':
-                            # They didn't update their frontpage; Load the next issue from here
-                            href = self.INDEX + parent['href']
-                            index = self.index_to_soup(href)
-                            self.log('\tLooking for current issue in: ' + href)
+        #Loop through all of the "latest" covers until we find one that actually has articles
+        for item in soup.findAll('div', attrs={'id': re.compile("ecomthumb_latest_*")}):
+            regex = re.compile('ecomthumb_latest_(\d*)')
+            result = regex.search(str(item))
+            current_issue_number = str(result.group(1))
+            current_issue_link = 'http://sportsillustrated.cnn.com/vault/cover/toc/' + current_issue_number + '/index.htm'
+            self.log('Checking this link for a TOC:  ', current_issue_link)
 
+            index = self.index_to_soup(current_issue_link)
+            if index:
                 if index.find('div', 'siv_noArticleMessage'):
-                    nav = index.find('div', attrs = {'class': 'siv_trav_top'})
-                    if nav:
-                    # Their frontpage points to an issue without any articles; Use the previous issue
-                        img = nav.find('img', attrs = {'src': 'http://i.cdn.turner.com/sivault/.element/img/1.0/btn_previous_v2.jpg'})
-                        if img:
-                            parent = img.parent
-                            if parent.name == 'a':
-                                href = self.INDEX + parent['href']
-                                index = self.index_to_soup(href)
-                                self.log('\tLooking for current issue in: ' + href)
+                    self.log('No TOC for this one.  Skipping...')
+                else:
+                    self.log('Found a TOC...  Using this link')
+                    break
 
+        # Find all articles.
+        list = index.find('div', attrs = {'class' : 'siv_artList'})
+        if list:
+            self.log ('found siv_artList')
+            articles = []
+            # Get all the artcles ready for calibre.
+            counter = 0
+            for headline in list.findAll('div', attrs = {'class' : 'headline'}):
+                counter = counter + 1
+                title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'}))
+                url = self.INDEX + headline.a['href']
+                description = self.tag_to_string(headline.findNextSibling('a').div)
+                article = {'title' : title, 'date' : u'', 'url'  : url, 'description' : description}
+                articles.append(article)
+                if counter > 5:
+                    break
 
-                # Find all articles.
-                list = index.find('div', attrs = {'class' : 'siv_artList'})
-                if list:
-                    articles = []
-                    # Get all the artcles ready for calibre.
-                    for headline in list.findAll('div', attrs = {'class' : 'headline'}):
-                        title = self.tag_to_string(headline.a) + '\n' + self.tag_to_string(headline.findNextSibling('div', attrs = {'class' : 'info'}))
-                        url = self.INDEX + headline.a['href']
-                        description = self.tag_to_string(headline.findNextSibling('a').div)
-                        article = {'title' : title, 'date' : u'', 'url'  : url, 'description' : description}
+            # See if we can find a meaningfull title
+            feedTitle = 'Current Issue'
+            hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'})
+            if hasTitle :
+                feedTitle = self.tag_to_string(hasTitle.h1)
 
-                        articles.append(article)
-
-                    # See if we can find a meaningfull title
-                    feedTitle = 'Current Issue'
-                    hasTitle = index.find('div', attrs = {'class' : 'siv_imageText_head'})
-                    if hasTitle :
-                        feedTitle = self.tag_to_string(hasTitle.h1)
-
-                    answer.append([feedTitle, articles])
+            answer.append([feedTitle, articles])
 
         return answer
 
@@ -82,6 +70,7 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
         # This is the url and the parameters that work to get the print version.
         printUrl = 'http://si.printthis.clickability.com/pt/printThis?clickMap=printThis'
         printUrl += '&fb=Y&partnerID=2356&url=' + quote(url)
+        self.log('PrintURL: ' , printUrl)
 
         return printUrl
 
@@ -116,4 +105,3 @@ class SportsIllustratedRecipe(BasicNewsRecipe) :
 
         return homeMadeSoup
         '''
-

From 0fbf30487e9b1b174c704d6c8a2651ddfade3318 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 16:05:30 -0600
Subject: [PATCH 09/15] Print out identifiers when printing a Metadata object

---
 src/calibre/ebooks/metadata/book/base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index c5a8a82db1..6818126699 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -651,6 +651,9 @@ class Metadata(object):
             fmt('Published', isoformat(self.pubdate))
         if self.rights is not None:
             fmt('Rights', unicode(self.rights))
+        if self.identifiers:
+            fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
+                self.identifiers.iteritems()]))
         for key in self.custom_field_keys():
             val = self.get(key, None)
             if val:

From 68f63e807a8b60a5c582b78707779c1d01b3cec2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 16:07:09 -0600
Subject: [PATCH 10/15] Fix ratings not being downloaded from Amazon

---
 src/calibre/ebooks/metadata/amazon.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/calibre/ebooks/metadata/amazon.py b/src/calibre/ebooks/metadata/amazon.py
index 91e6919837..8e4dd1dd27 100644
--- a/src/calibre/ebooks/metadata/amazon.py
+++ b/src/calibre/ebooks/metadata/amazon.py
@@ -154,17 +154,16 @@ def get_metadata(br, asin, mi):
         return False
     if root.xpath('//*[@id="errorMessage"]'):
         return False
-    ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
+
+    ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
+    pat = re.compile(r'([0-9.]+) out of (\d+) stars')
     if ratings:
-        pat = re.compile(r'([0-9.]+) out of (\d+) stars')
-        r = ratings[0]
-        for elem in r.xpath('descendant::*[@title]'):
-            t = elem.get('title')
+        for elem in ratings[0].xpath('descendant::*[@title]'):
+            t = elem.get('title').strip()
             m = pat.match(t)
             if m is not None:
                 try:
                     mi.rating = float(m.group(1))/float(m.group(2)) * 5
-                    break
                 except:
                     pass
 
@@ -216,6 +215,7 @@ def main(args=sys.argv):
             print 'Failed to downlaod social metadata for', title
             return 1
         #print '\n\n', time.time() - st, '\n\n'
+        print mi
         print '\n'
 
     return 0

From abfab2fce09d0893407187085ffd8d1cc1cc53c1 Mon Sep 17 00:00:00 2001
From: John Schember <john@nachtimwald.com>
Date: Wed, 16 Mar 2011 19:38:51 -0400
Subject: [PATCH 11/15] TXT Input: Textile, escaped styling submitted by Perkin

---
 src/calibre/ebooks/textile/functions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/calibre/ebooks/textile/functions.py b/src/calibre/ebooks/textile/functions.py
index eca4bcecff..ec675b9b62 100644
--- a/src/calibre/ebooks/textile/functions.py
+++ b/src/calibre/ebooks/textile/functions.py
@@ -807,7 +807,7 @@ class Textile(object):
 
         for qtag in qtags:
             pattern = re.compile(r"""
-                (?:^|(?<=[\s>%(pnct)s])|([\]}]))
+                (?:^|(?<=[\s>%(pnct)s])|\[|([\]}]))
                 (%(qtag)s)(?!%(qtag)s)
                 (%(c)s)
                 (?::(\S+))?

From cc14a6a657072491cd0745220c6ccf95b7e505a6 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 17:42:42 -0600
Subject: [PATCH 12/15] ...

---
 src/calibre/ebooks/metadata/book/base.py      |   7 +-
 src/calibre/ebooks/metadata/sources/amazon.py | 156 +++++++++++++-----
 src/calibre/ebooks/metadata/sources/base.py   |   9 +
 src/calibre/ebooks/metadata/sources/google.py |   5 +-
 4 files changed, 134 insertions(+), 43 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index 6818126699..db0c278340 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -633,10 +633,6 @@ class Metadata(object):
             fmt('Publisher', self.publisher)
         if getattr(self, 'book_producer', False):
             fmt('Book Producer', self.book_producer)
-        if self.comments:
-            fmt('Comments', self.comments)
-        if self.isbn:
-            fmt('ISBN', self.isbn)
         if self.tags:
             fmt('Tags', u', '.join([unicode(t) for t in self.tags]))
         if self.series:
@@ -654,6 +650,9 @@ class Metadata(object):
         if self.identifiers:
             fmt('Identifiers', u', '.join(['%s:%s'%(k, v) for k, v in
                 self.identifiers.iteritems()]))
+        if self.comments:
+            fmt('Comments', self.comments)
+
         for key in self.custom_field_keys():
             val = self.get(key, None)
             if val:
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 33ea24c421..30b95950ea 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -20,6 +20,7 @@ from calibre.utils.cleantext import clean_ascii_chars
 from calibre.ebooks.chardet import xml_to_unicode
 from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
+from calibre.utils.date import parse_date
 
 class Worker(Thread):
 
@@ -28,10 +29,12 @@ class Worker(Thread):
     '''
 
     def __init__(self, url, result_queue, browser, log, timeout=20):
+        Thread.__init__(self)
+        self.daemon = True
         self.url, self.result_queue = url, result_queue
         self.log, self.timeout = log, timeout
         self.browser = browser.clone_browser()
-        self.cover_url = self.amazon_id = None
+        self.cover_url = self.amazon_id = self.isbn = None
 
     def run(self):
         try:
@@ -111,7 +114,7 @@ class Worker(Thread):
         self.amazon_id = asin
 
         try:
-            mi.rating = self.parse_ratings(root)
+            mi.rating = self.parse_rating(root)
         except:
             self.log.exception('Error parsing ratings for url: %r'%self.url)
 
@@ -125,6 +128,37 @@ class Worker(Thread):
         except:
             self.log.exception('Error parsing cover for url: %r'%self.url)
 
+        pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
+        if pd:
+            pd = pd[0]
+
+            try:
+                isbn = self.parse_isbn(pd)
+                if isbn:
+                    self.isbn = mi.isbn = isbn
+            except:
+                self.log.exception('Error parsing ISBN for url: %r'%self.url)
+
+            try:
+                mi.publisher = self.parse_publisher(pd)
+            except:
+                self.log.exception('Error parsing publisher for url: %r'%self.url)
+
+            try:
+                mi.pubdate = self.parse_pubdate(pd)
+            except:
+                self.log.exception('Error parsing publish date for url: %r'%self.url)
+
+            try:
+                lang = self.parse_language(pd)
+                if lang:
+                    mi.language = lang
+            except:
+                self.log.exception('Error parsing language for url: %r'%self.url)
+
+        else:
+            self.log.warning('Failed to find product description for url: %r'%self.url)
+
         self.result_queue.put(mi)
 
     def parse_asin(self, root):
@@ -140,27 +174,23 @@ class Worker(Thread):
                     method='text').strip()
         else:
             title = tostring(tdiv, encoding=unicode, method='text').strip()
-        return re.sub(r'[([].*[)]]', '', title).strip()
+        return re.sub(r'[(\[].*[)\]]', '', title).strip()
 
     def parse_authors(self, root):
-        bdiv = root.xpath('//div[@class="buying"]')[0]
-        aname = bdiv.xpath('descendant::span[@class="contributorNameTrigger"]')
+        aname = root.xpath('//span[@class="contributorNameTrigger"]')
         authors = [tostring(x, encoding=unicode, method='text').strip() for x
                 in aname]
         return authors
 
-    def parse_ratings(self, root):
-        ratings = root.xpath('//form[@id="handleBuy"]/descendant::*[@class="asinReviewsSummary"]')
+    def parse_rating(self, root):
+        ratings = root.xpath('//div[@class="jumpBar"]/descendant::span[@class="asinReviewsSummary"]')
         pat = re.compile(r'([0-9.]+) out of (\d+) stars')
         if ratings:
             for elem in ratings[0].xpath('descendant::*[@title]'):
-                t = elem.get('title')
+                t = elem.get('title').strip()
                 m = pat.match(t)
                 if m is not None:
-                    try:
-                        return float(m.group(1))/float(m.group(2)) * 5
-                    except:
-                        pass
+                    return float(m.group(1))/float(m.group(2)) * 5
 
     def parse_comments(self, root):
         desc = root.xpath('//div[@id="productDescription"]/*[@class="content"]')
@@ -193,6 +223,37 @@ class Worker(Thread):
                     bn = sparts[0] + sparts[-1]
                     return ('/'.join(parts[:-1]))+'/'+bn
 
+    def parse_isbn(self, pd):
+        for x in reversed(pd.xpath(
+            'descendant::*[starts-with(text(), "ISBN")]')):
+            if x.tail:
+                ans = check_isbn(x.tail.strip())
+                if ans:
+                    return ans
+
+    def parse_publisher(self, pd):
+        for x in reversed(pd.xpath(
+            'descendant::*[starts-with(text(), "Publisher:")]')):
+            if x.tail:
+                ans = x.tail.partition(';')[0]
+                return ans.partition('(')[0].strip()
+
+    def parse_pubdate(self, pd):
+        for x in reversed(pd.xpath(
+            'descendant::*[starts-with(text(), "Publisher:")]')):
+            if x.tail:
+                ans = x.tail
+                date = ans.partition('(')[-1].replace(')', '').strip()
+                return parse_date(date, assume_utc=True)
+
+    def parse_language(self, pd):
+        for x in reversed(pd.xpath(
+            'descendant::*[starts-with(text(), "Language:")]')):
+            if x.tail:
+                ans = x.tail.strip()
+                if ans == 'English':
+                    return 'en'
+
 
 class Amazon(Source):
 
@@ -200,7 +261,8 @@ class Amazon(Source):
     description = _('Downloads metadata from Amazon')
 
     capabilities = frozenset(['identify'])
-    touched_fields = frozenset(['title', 'authors', 'isbn', 'pubdate', 'comments'])
+    touched_fields = frozenset(['title', 'authors', 'identifier:amazon',
+        'identifier:isbn', 'rating', 'comments', 'publisher', 'pubdate'])
 
     AMAZON_DOMAINS = {
             'com': _('US'),
@@ -254,6 +316,10 @@ class Amazon(Source):
 
     def identify(self, log, result_queue, abort, title=None, authors=None,
             identifiers={}, timeout=20):
+        '''
+        Note this method will retry without identifiers automatically if no
+        match is found with identifiers.
+        '''
         query = self.create_query(log, title=title, authors=authors,
                 identifiers=identifiers)
         if query is None:
@@ -281,37 +347,45 @@ class Amazon(Source):
         raw = xml_to_unicode(raw, strip_encoding_pats=True,
                 resolve_entities=True)[0]
 
-        if '<title>404 - ' in raw:
-            log.error('No matches found for query: %r'%query)
-            return
-
-        try:
-            root = soupparser.fromstring(clean_ascii_chars(raw))
-        except:
-            msg = 'Failed to parse amazon page for query: %r'%query
-            log.exception(msg)
-            return msg
-
-        errmsg = root.xpath('//*[@id="errorMessage"]')
-        if errmsg:
-            msg = tostring(errmsg, method='text', encoding=unicode).strip()
-            log.error(msg)
-            # The error is almost always a not found error
-            return
-
         matches = []
-        for div in root.xpath(r'//div[starts-with(@id, "result_")]'):
-            for a in div.xpath(r'descendant::a[@class="title" and @href]'):
-                title = tostring(a, method='text', encoding=unicode).lower()
-                if 'bulk pack' not in title:
-                    matches.append(a.get('href'))
-                break
+        found = '<title>404 - ' not in raw
+
+        if found:
+            try:
+                root = soupparser.fromstring(clean_ascii_chars(raw))
+            except:
+                msg = 'Failed to parse amazon page for query: %r'%query
+                log.exception(msg)
+                return msg
+
+                errmsg = root.xpath('//*[@id="errorMessage"]')
+                if errmsg:
+                    msg = tostring(errmsg, method='text', encoding=unicode).strip()
+                    log.error(msg)
+                    # The error is almost always a not found error
+                    found = False
+
+        if found:
+            for div in root.xpath(r'//div[starts-with(@id, "result_")]'):
+                for a in div.xpath(r'descendant::a[@class="title" and @href]'):
+                    title = tostring(a, method='text', encoding=unicode).lower()
+                    if 'bulk pack' not in title:
+                        matches.append(a.get('href'))
+                    break
 
         # Keep only the top 5 matches as the matches are sorted by relevance by
         # Amazon so lower matches are not likely to be very relevant
         matches = matches[:5]
 
+        if abort.is_set():
+            return
+
         if not matches:
+            if identifiers and title and authors:
+                self.log('No matches found with identifiers, retrying using only'
+                        ' title and authors')
+                return self.identify(log, result_queue, abort, title=title,
+                        authors=authors, timeout=timeout)
             log.error('No matches found with query: %r'%query)
             return
 
@@ -333,6 +407,14 @@ class Amazon(Source):
             if not a_worker_is_alive:
                 break
 
+        for w in workers:
+            if w.amazon_id:
+                if w.isbn:
+                    self.cache_isbn_to_identifier(w.isbn, w.amazon_id)
+                if w.cover_url:
+                    self.cache_identifier_to_cover_url(w.amazon_id,
+                            w.cover_url)
+
         return None
 
 
diff --git a/src/calibre/ebooks/metadata/sources/base.py b/src/calibre/ebooks/metadata/sources/base.py
index 523d012cd5..3c320d14b6 100644
--- a/src/calibre/ebooks/metadata/sources/base.py
+++ b/src/calibre/ebooks/metadata/sources/base.py
@@ -35,6 +35,7 @@ class Source(Plugin):
     def __init__(self, *args, **kwargs):
         Plugin.__init__(self, *args, **kwargs)
         self._isbn_to_identifier_cache = {}
+        self._identifier_to_cover_url_cache = {}
         self.cache_lock = threading.RLock()
         self._config_obj = None
         self._browser = None
@@ -68,6 +69,14 @@ class Source(Plugin):
         with self.cache_lock:
             return self._isbn_to_identifier_cache.get(isbn, None)
 
+    def cache_identifier_to_cover_url(self, id_, url):
+        with self.cache_lock:
+            self._identifier_to_cover_url_cache[id_] = url
+
+    def cached_identifier_to_cover_url(self, id_):
+        with self.cache_lock:
+            return self._identifier_to_cover_url_cache.get(id_, None)
+
     def get_author_tokens(self, authors, only_first_author=True):
         '''
         Take a list of authors and return a list of tokens useful for an
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 923062379e..8a7fc8e540 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -145,8 +145,9 @@ class GoogleBooks(Source):
     description = _('Downloads metadata from Google Books')
 
     capabilities = frozenset(['identify'])
-    touched_fields = frozenset(['title', 'authors', 'isbn', 'tags', 'pubdate',
-        'comments', 'publisher', 'author_sort']) # language currently disabled
+    touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
+        'comments', 'publisher', 'author_sort', 'identifier:isbn',
+        'identifier:google']) # language currently disabled
 
     def create_query(self, log, title=None, authors=None, identifiers={}):
         BASE_URL = 'http://books.google.com/books/feeds/volumes?'

From c53f66f7521f25f27493baed3200919a0da8f4d7 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 22:00:45 -0600
Subject: [PATCH 13/15] New google and amazon metadata plugins finished

---
 src/calibre/ebooks/metadata/book/base.py      |  5 ++++
 src/calibre/ebooks/metadata/sources/amazon.py | 10 ++++----
 src/calibre/ebooks/metadata/sources/google.py | 23 ++++++++-----------
 src/calibre/ebooks/metadata/sources/test.py   | 10 ++++++++
 4 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/src/calibre/ebooks/metadata/book/base.py b/src/calibre/ebooks/metadata/book/base.py
index db0c278340..2bf23e4b82 100644
--- a/src/calibre/ebooks/metadata/book/base.py
+++ b/src/calibre/ebooks/metadata/book/base.py
@@ -227,6 +227,11 @@ class Metadata(object):
         if val:
             identifiers[typ] = val
 
+    def has_identifier(self, typ):
+        identifiers = object.__getattribute__(self,
+            '_data')['identifiers']
+        return typ in identifiers
+
     # field-oriented interface. Intended to be the same as in LibraryDatabase
 
     def standard_field_keys(self):
diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 30b95950ea..e8b7bf2e2c 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -22,7 +22,7 @@ from calibre.ebooks.metadata.book.base import Metadata
 from calibre.library.comments import sanitize_comments_html
 from calibre.utils.date import parse_date
 
-class Worker(Thread):
+class Worker(Thread): # {{{
 
     '''
     Get book details from amazons book page in a separate thread
@@ -253,7 +253,7 @@ class Worker(Thread):
                 ans = x.tail.strip()
                 if ans == 'English':
                     return 'en'
-
+# }}}
 
 class Amazon(Source):
 
@@ -270,7 +270,7 @@ class Amazon(Source):
             'de' : _('Germany'),
     }
 
-    def create_query(self, log, title=None, authors=None, identifiers={}):
+    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
         domain = self.prefs.get('domain', 'com')
 
         # See the amazon detailed search page to get all options
@@ -313,8 +313,9 @@ class Amazon(Source):
         url = 'http://www.amazon.%s/s/?'%domain + urlencode(utf8q)
         return url
 
+    # }}}
 
-    def identify(self, log, result_queue, abort, title=None, authors=None,
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=20):
         '''
         Note this method will retry without identifiers automatically if no
@@ -416,6 +417,7 @@ class Amazon(Source):
                             w.cover_url)
 
         return None
+    # }}}
 
 
 if __name__ == '__main__':
diff --git a/src/calibre/ebooks/metadata/sources/google.py b/src/calibre/ebooks/metadata/sources/google.py
index 8a7fc8e540..8dffd3f053 100644
--- a/src/calibre/ebooks/metadata/sources/google.py
+++ b/src/calibre/ebooks/metadata/sources/google.py
@@ -42,7 +42,7 @@ subject        = XPath('descendant::dc:subject')
 description    = XPath('descendant::dc:description')
 language       = XPath('descendant::dc:language')
 
-def get_details(browser, url, timeout):
+def get_details(browser, url, timeout): # {{{
     try:
         raw = browser.open_novisit(url, timeout=timeout).read()
     except Exception as e:
@@ -54,8 +54,9 @@ def get_details(browser, url, timeout):
         raw = browser.open_novisit(url, timeout=timeout).read()
 
     return raw
+# }}}
 
-def to_metadata(browser, log, entry_, timeout):
+def to_metadata(browser, log, entry_, timeout): # {{{
 
     def get_text(extra, x):
         try:
@@ -94,12 +95,6 @@ def to_metadata(browser, log, entry_, timeout):
     #mi.language = get_text(extra, language)
     mi.publisher = get_text(extra, publisher)
 
-    # Author sort
-    for x in creator(extra):
-        for key, val in x.attrib.items():
-            if key.endswith('file-as') and val and val.strip():
-                mi.author_sort = val
-                break
     # ISBN
     isbns = []
     for x in identifier(extra):
@@ -137,7 +132,7 @@ def to_metadata(browser, log, entry_, timeout):
 
 
     return mi
-
+# }}}
 
 class GoogleBooks(Source):
 
@@ -146,10 +141,10 @@ class GoogleBooks(Source):
 
     capabilities = frozenset(['identify'])
     touched_fields = frozenset(['title', 'authors', 'tags', 'pubdate',
-        'comments', 'publisher', 'author_sort', 'identifier:isbn',
+        'comments', 'publisher', 'identifier:isbn',
         'identifier:google']) # language currently disabled
 
-    def create_query(self, log, title=None, authors=None, identifiers={}):
+    def create_query(self, log, title=None, authors=None, identifiers={}): # {{{
         BASE_URL = 'http://books.google.com/books/feeds/volumes?'
         isbn = check_isbn(identifiers.get('isbn', None))
         q = ''
@@ -177,6 +172,7 @@ class GoogleBooks(Source):
             'start-index':1,
             'min-viewability':'none',
             })
+    # }}}
 
     def cover_url_from_identifiers(self, identifiers):
         goog = identifiers.get('google', None)
@@ -209,11 +205,11 @@ class GoogleBooks(Source):
             if abort.is_set():
                 break
 
-    def identify(self, log, result_queue, abort, title=None, authors=None,
+    def identify(self, log, result_queue, abort, title=None, authors=None, # {{{
             identifiers={}, timeout=20):
         query = self.create_query(log, title=title, authors=authors,
                 identifiers=identifiers)
-        br = self.browser()
+        br = self.browser
         try:
             raw = br.open_novisit(query, timeout=timeout).read()
         except Exception, e:
@@ -234,6 +230,7 @@ class GoogleBooks(Source):
         self.get_all_details(br, log, entries, abort, result_queue, timeout)
 
         return None
+    # }}}
 
 if __name__ == '__main__':
     # To run these test use: calibre-debug -e src/calibre/ebooks/metadata/sources/google.py
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index 3b41e69d40..e877eabd83 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -102,6 +102,16 @@ def test_identify_plugin(name, tests):
             prints('Log saved to', lf)
             raise SystemExit(1)
 
+    for key in plugin.touched_fields:
+        if key.startswith('identifier:'):
+            key = key.partition(':')[-1]
+            if not match_found.has_identifier(key):
+                prints('Failed to find identifier:', key)
+                raise SystemExit(1)
+        elif match_found.is_null(key):
+            prints('Failed to find', key)
+            raise SystemExit(1)
+
     prints('Average time per query', sum(times)/len(times))
 
     if os.stat(lf).st_size > 10:

From 2aee9ec3217c57df2540fe3d6ec18710155bbd57 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 22:58:31 -0600
Subject: [PATCH 14/15] ...

---
 src/calibre/ebooks/metadata/sources/amazon.py | 23 +++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index e8b7bf2e2c..2e3180a739 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -383,7 +383,7 @@ class Amazon(Source):
 
         if not matches:
             if identifiers and title and authors:
-                self.log('No matches found with identifiers, retrying using only'
+                log('No matches found with identifiers, retrying using only'
                         ' title and authors')
                 return self.identify(log, result_queue, abort, title=title,
                         authors=authors, timeout=timeout)
@@ -428,10 +428,29 @@ if __name__ == '__main__':
     test_identify_plugin(Amazon.name,
         [
 
-            (
+            (  # This isbn not on amazon
+                {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
+                    'authors':['Lutz']},
+                [title_test('Learning Python: Powerful Object-Oriented Programming', exact=True)]
+
+            ),
+
+            ( # Sophisticated comment formatting
+                {'identifiers':{'isbn': '9781416580829'}},
+                [title_test('Angels & Demons - Movie Tie-In: A Novel', exact=True)]
+            ),
+
+            ( # No specific problems
                 {'identifiers':{'isbn': '0743273567'}},
                 [title_test('The great gatsby', exact=True)]
             ),
+
+            (  # A newer book
+                {'identifiers':{'isbn': '9780316044981'}},
+                [title_test('The Heroes', exact=True)]
+
+            ),
+
         ])
 
 

From 49e4f2cf28c09625a6ebe6476199d62958398637 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 16 Mar 2011 23:44:48 -0600
Subject: [PATCH 15/15] Amazon plugin: Handle the case of authors unknown to
 amazon and dont set the cover URL to the no cover available image

---
 src/calibre/ebooks/metadata/sources/amazon.py | 46 +++++++++++++------
 src/calibre/ebooks/metadata/sources/test.py   |  9 ++++
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py
index 2e3180a739..335a43ebb0 100644
--- a/src/calibre/ebooks/metadata/sources/amazon.py
+++ b/src/calibre/ebooks/metadata/sources/amazon.py
@@ -62,6 +62,7 @@ class Worker(Thread): # {{{
 
         raw = xml_to_unicode(raw, strip_encoding_pats=True,
                 resolve_entities=True)[0]
+        # open('/t/t.html', 'wb').write(raw)
 
         if '<title>404 - ' in raw:
             self.log.error('URL malformed: %r'%self.url)
@@ -127,6 +128,7 @@ class Worker(Thread): # {{{
             self.cover_url = self.parse_cover(root)
         except:
             self.log.exception('Error parsing cover for url: %r'%self.url)
+        mi.has_cover = bool(self.cover_url)
 
         pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]')
         if pd:
@@ -177,7 +179,10 @@ class Worker(Thread): # {{{
         return re.sub(r'[(\[].*[)\]]', '', title).strip()
 
     def parse_authors(self, root):
-        aname = root.xpath('//span[@class="contributorNameTrigger"]')
+        x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]'
+        aname = root.xpath(x)
+        for x in aname:
+            x.tail = ''
         authors = [tostring(x, encoding=unicode, method='text').strip() for x
                 in aname]
         return authors
@@ -215,13 +220,14 @@ class Worker(Thread): # {{{
         imgs = root.xpath('//img[@id="prodImage" and @src]')
         if imgs:
             src = imgs[0].get('src')
-            parts = src.split('/')
-            if len(parts) > 3:
-                bn = parts[-1]
-                sparts = bn.split('_')
-                if len(sparts) > 2:
-                    bn = sparts[0] + sparts[-1]
-                    return ('/'.join(parts[:-1]))+'/'+bn
+            if '/no-image-avail' not in src:
+                parts = src.split('/')
+                if len(parts) > 3:
+                    bn = parts[-1]
+                    sparts = bn.split('_')
+                    if len(sparts) > 2:
+                        bn = sparts[0] + sparts[-1]
+                        return ('/'.join(parts[:-1]))+'/'+bn
 
     def parse_isbn(self, pd):
         for x in reversed(pd.xpath(
@@ -424,30 +430,44 @@ if __name__ == '__main__':
     # To run these test use: calibre-debug -e
     # src/calibre/ebooks/metadata/sources/amazon.py
     from calibre.ebooks.metadata.sources.test import (test_identify_plugin,
-            title_test)
+            title_test, authors_test)
     test_identify_plugin(Amazon.name,
         [
 
+            ( # An e-book ISBN not on Amazon, one of the authors is
+              # unknown to Amazon, so no popup wrapper
+                {'identifiers':{'isbn': '0307459671'},
+                    'title':'Invisible Gorilla', 'authors':['Christopher Chabris']},
+                [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us',
+                    exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])]
+
+            ),
+
             (  # This isbn not on amazon
                 {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python',
                     'authors':['Lutz']},
-                [title_test('Learning Python: Powerful Object-Oriented Programming', exact=True)]
+                [title_test('Learning Python: Powerful Object-Oriented Programming',
+                    exact=True), authors_test(['Mark Lutz'])
+                 ]
 
             ),
 
             ( # Sophisticated comment formatting
                 {'identifiers':{'isbn': '9781416580829'}},
-                [title_test('Angels & Demons - Movie Tie-In: A Novel', exact=True)]
+                [title_test('Angels & Demons - Movie Tie-In: A Novel',
+                    exact=True), authors_test(['Dan Brown'])]
             ),
 
             ( # No specific problems
                 {'identifiers':{'isbn': '0743273567'}},
-                [title_test('The great gatsby', exact=True)]
+                [title_test('The great gatsby', exact=True),
+                    authors_test(['F. Scott Fitzgerald'])]
             ),
 
             (  # A newer book
                 {'identifiers':{'isbn': '9780316044981'}},
-                [title_test('The Heroes', exact=True)]
+                [title_test('The Heroes', exact=True),
+                    authors_test(['Joe Abercrombie'])]
 
             ),
 
diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py
index e877eabd83..69e0c32846 100644
--- a/src/calibre/ebooks/metadata/sources/test.py
+++ b/src/calibre/ebooks/metadata/sources/test.py
@@ -37,6 +37,15 @@ def title_test(title, exact=False):
 
     return test
 
+def authors_test(authors):
+    authors = set([x.lower() for x in authors])
+
+    def test(mi):
+        au = set([x.lower() for x in mi.authors])
+        return au == authors
+
+    return test
+
 def test_identify_plugin(name, tests):
     '''
     :param name: Plugin name