Fix language/isbn/publisher/pubdate not being downloaded form amazon because of website changes

This commit is contained in:
Kovid Goyal 2021-04-02 11:19:44 +05:30
parent 3446f3b5d3
commit 00b596afb2
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -425,15 +425,18 @@ class Worker(Thread): # Get details {{{
self.cover_url = self.cover_url_processor(self.cover_url) self.cover_url = self.cover_url_processor(self.cover_url)
mi.has_cover = bool(self.cover_url) mi.has_cover = bool(self.cover_url)
detail_bullets = root.xpath('//*[@data-feature-name="detailBullets"]')
non_hero = tuple(self.selector( non_hero = tuple(self.selector(
'div#bookDetails_container_div div#nonHeroSection')) 'div#bookDetails_container_div div#nonHeroSection'))
if non_hero: if detail_bullets:
# New style markup self.parse_detail_bullets(root, mi, detail_bullets[0])
elif non_hero:
try: try:
self.parse_new_details(root, mi, non_hero[0]) self.parse_new_details(root, mi, non_hero[0])
except: except:
self.log.exception( self.log.exception(
'Failed to parse new-style book details section') 'Failed to parse new-style book details section')
else: else:
pd = root.xpath(self.pd_xpath) pd = root.xpath(self.pd_xpath)
if pd: if pd:
@ -840,36 +843,46 @@ class Worker(Thread): # Get details {{{
if url: if url:
return url return url
def parse_detail_bullets(self, root, mi, container):
ul = next(self.selector('.detail-bullet-list', root=container))
for span in self.selector('.a-list-item', root=ul):
cells = span.xpath('./span')
if len(cells) >= 2:
self.parse_detail_cells(mi, cells[0], cells[1])
def parse_new_details(self, root, mi, non_hero): def parse_new_details(self, root, mi, non_hero):
table = non_hero.xpath('descendant::table')[0] table = non_hero.xpath('descendant::table')[0]
for tr in table.xpath('descendant::tr'): for tr in table.xpath('descendant::tr'):
cells = tr.xpath('descendant::td') cells = tr.xpath('descendant::td')
if len(cells) == 2: if len(cells) == 2:
name = self.totext(cells[0]) self.parse_detail_cells(mi, cells[0], cells[1])
val = self.totext(cells[1])
if not val: def parse_detail_cells(self, mi, c1, c2):
continue name = self.totext(c1).strip().strip(':').strip()
if name in self.language_names: val = self.totext(c2)
ans = self.lang_map.get(val, None) if not val:
if not ans: return
ans = canonicalize_lang(val) if name in self.language_names:
if ans: ans = self.lang_map.get(val, None)
mi.language = ans if not ans:
elif name in self.publisher_names: ans = canonicalize_lang(val)
pub = val.partition(';')[0].partition('(')[0].strip() if ans:
if pub: mi.language = ans
mi.publisher = pub elif name in self.publisher_names:
date = val.rpartition('(')[-1].replace(')', '').strip() pub = val.partition(';')[0].partition('(')[0].strip()
try: if pub:
from calibre.utils.date import parse_only_date mi.publisher = pub
date = self.delocalize_datestr(date) date = val.rpartition('(')[-1].replace(')', '').strip()
mi.pubdate = parse_only_date(date, assume_utc=True) try:
except: from calibre.utils.date import parse_only_date
self.log.exception('Failed to parse pubdate: %s' % val) date = self.delocalize_datestr(date)
elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}: mi.pubdate = parse_only_date(date, assume_utc=True)
ans = check_isbn(val) except:
if ans: self.log.exception('Failed to parse pubdate: %s' % val)
self.isbn = mi.isbn = ans elif name in {'ISBN', 'ISBN-10', 'ISBN-13'}:
ans = check_isbn(val)
if ans:
self.isbn = mi.isbn = ans
def parse_isbn(self, pd): def parse_isbn(self, pd):
items = pd.xpath( items = pd.xpath(
@ -925,7 +938,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 2, 16) version = (1, 2, 17)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')
@ -1568,7 +1581,7 @@ def manual_tests(domain, **kw): # {{{
( # Paperback with series ( # Paperback with series
{'identifiers': {'amazon': '1423146786'}}, {'identifiers': {'amazon': '1423146786'}},
[title_test('The Heroes of Olympus, Book Five The Blood of Olympus', [title_test('The Heroes of Olympus, Book Five The Blood of Olympus',
exact=True), series_test('Heroes of Olympus', 5)] exact=True), series_test('The Heroes of Olympus', 5)]
), ),
( # Kindle edition with series ( # Kindle edition with series
@ -1604,7 +1617,7 @@ def manual_tests(domain, **kw): # {{{
( # No specific problems ( # No specific problems
{'identifiers': {'isbn': '0743273567'}}, {'identifiers': {'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True), [title_test('The great gatsby', exact=True),
authors_test(['F. Scott Fitzgerald'])] authors_test(['Francis Scott Fitzgerald'])]
), ),
] ]