Fix language/isbn/publisher/pubdate not being downloaded form amazon because of website changes

This commit is contained in:
Kovid Goyal 2021-04-02 11:19:44 +05:30
parent 3446f3b5d3
commit 00b596afb2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -425,15 +425,18 @@ class Worker(Thread): # Get details {{{
self.cover_url = self.cover_url_processor(self.cover_url)
mi.has_cover = bool(self.cover_url)
detail_bullets = root.xpath('//*[@data-feature-name="detailBullets"]')
non_hero = tuple(self.selector(
'div#bookDetails_container_div div#nonHeroSection'))
if non_hero:
# New style markup
if detail_bullets:
self.parse_detail_bullets(root, mi, detail_bullets[0])
elif non_hero:
try:
self.parse_new_details(root, mi, non_hero[0])
except:
self.log.exception(
'Failed to parse new-style book details section')
else:
pd = root.xpath(self.pd_xpath)
if pd:
@ -840,15 +843,25 @@ class Worker(Thread): # Get details {{{
if url:
return url
def parse_detail_bullets(self, root, mi, container):
ul = next(self.selector('.detail-bullet-list', root=container))
for span in self.selector('.a-list-item', root=ul):
cells = span.xpath('./span')
if len(cells) >= 2:
self.parse_detail_cells(mi, cells[0], cells[1])
def parse_new_details(self, root, mi, non_hero):
table = non_hero.xpath('descendant::table')[0]
for tr in table.xpath('descendant::tr'):
cells = tr.xpath('descendant::td')
if len(cells) == 2:
name = self.totext(cells[0])
val = self.totext(cells[1])
self.parse_detail_cells(mi, cells[0], cells[1])
def parse_detail_cells(self, mi, c1, c2):
name = self.totext(c1).strip().strip(':').strip()
val = self.totext(c2)
if not val:
continue
return
if name in self.language_names:
ans = self.lang_map.get(val, None)
if not ans:
@ -925,7 +938,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source):
name = 'Amazon.com'
version = (1, 2, 16)
version = (1, 2, 17)
minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon')
@ -1568,7 +1581,7 @@ def manual_tests(domain, **kw): # {{{
( # Paperback with series
{'identifiers': {'amazon': '1423146786'}},
[title_test('The Heroes of Olympus, Book Five The Blood of Olympus',
exact=True), series_test('Heroes of Olympus', 5)]
exact=True), series_test('The Heroes of Olympus', 5)]
),
( # Kindle edition with series
@ -1604,7 +1617,7 @@ def manual_tests(domain, **kw): # {{{
( # No specific problems
{'identifiers': {'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])]
authors_test(['Francis Scott Fitzgerald'])]
),
]