diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 2e3180a739..335a43ebb0 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -62,6 +62,7 @@ class Worker(Thread): # {{{ raw = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True)[0] + # open('/t/t.html', 'wb').write(raw) if '404 - ' in raw: self.log.error('URL malformed: %r'%self.url) @@ -127,6 +128,7 @@ class Worker(Thread): # {{{ self.cover_url = self.parse_cover(root) except: self.log.exception('Error parsing cover for url: %r'%self.url) + mi.has_cover = bool(self.cover_url) pd = root.xpath('//h2[text()="Product Details"]/../div[@class="content"]') if pd: @@ -177,7 +179,10 @@ class Worker(Thread): # {{{ return re.sub(r'[(\[].*[)\]]', '', title).strip() def parse_authors(self, root): - aname = root.xpath('//span[@class="contributorNameTrigger"]') + x = '//h1[@class="parseasinTitle"]/following-sibling::span/*[(name()="a" and @href) or (name()="span" and @class="contributorNameTrigger")]' + aname = root.xpath(x) + for x in aname: + x.tail = '' authors = [tostring(x, encoding=unicode, method='text').strip() for x in aname] return authors @@ -215,13 +220,14 @@ class Worker(Thread): # {{{ imgs = root.xpath('//img[@id="prodImage" and @src]') if imgs: src = imgs[0].get('src') - parts = src.split('/') - if len(parts) > 3: - bn = parts[-1] - sparts = bn.split('_') - if len(sparts) > 2: - bn = sparts[0] + sparts[-1] - return ('/'.join(parts[:-1]))+'/'+bn + if '/no-image-avail' not in src: + parts = src.split('/') + if len(parts) > 3: + bn = parts[-1] + sparts = bn.split('_') + if len(sparts) > 2: + bn = sparts[0] + sparts[-1] + return ('/'.join(parts[:-1]))+'/'+bn def parse_isbn(self, pd): for x in reversed(pd.xpath( @@ -424,30 +430,44 @@ if __name__ == '__main__': # To run these test use: calibre-debug -e # src/calibre/ebooks/metadata/sources/amazon.py from calibre.ebooks.metadata.sources.test import (test_identify_plugin, - title_test) + title_test, authors_test) test_identify_plugin(Amazon.name, [ + ( # An e-book ISBN not on Amazon, one of the authors is + # unknown to Amazon, so no popup wrapper + {'identifiers':{'isbn': '0307459671'}, + 'title':'Invisible Gorilla', 'authors':['Christopher Chabris']}, + [title_test('The Invisible Gorilla: And Other Ways Our Intuitions Deceive Us', + exact=True), authors_test(['Christopher Chabris', 'Daniel Simons'])] + + ), + ( # This isbn not on amazon {'identifiers':{'isbn': '8324616489'}, 'title':'Learning Python', 'authors':['Lutz']}, - [title_test('Learning Python: Powerful Object-Oriented Programming', exact=True)] + [title_test('Learning Python: Powerful Object-Oriented Programming', + exact=True), authors_test(['Mark Lutz']) + ] ), ( # Sophisticated comment formatting {'identifiers':{'isbn': '9781416580829'}}, - [title_test('Angels & Demons - Movie Tie-In: A Novel', exact=True)] + [title_test('Angels & Demons - Movie Tie-In: A Novel', + exact=True), authors_test(['Dan Brown'])] ), ( # No specific problems {'identifiers':{'isbn': '0743273567'}}, - [title_test('The great gatsby', exact=True)] + [title_test('The great gatsby', exact=True), + authors_test(['F. Scott Fitzgerald'])] ), ( # A newer book {'identifiers':{'isbn': '9780316044981'}}, - [title_test('The Heroes', exact=True)] + [title_test('The Heroes', exact=True), + authors_test(['Joe Abercrombie'])] ), diff --git a/src/calibre/ebooks/metadata/sources/test.py b/src/calibre/ebooks/metadata/sources/test.py index e877eabd83..69e0c32846 100644 --- a/src/calibre/ebooks/metadata/sources/test.py +++ b/src/calibre/ebooks/metadata/sources/test.py @@ -37,6 +37,15 @@ def title_test(title, exact=False): return test +def authors_test(authors): + authors = set([x.lower() for x in authors]) + + def test(mi): + au = set([x.lower() for x in mi.authors]) + return au == authors + + return test + def test_identify_plugin(name, tests): ''' :param name: Plugin name