Update Amazon metadata download plugin for website changes

This commit is contained in:
Kovid Goyal 2022-11-03 22:23:53 +05:30
parent 9686fb190d
commit a5f4b90e47
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -682,6 +682,10 @@ class Worker(Thread): # Get details {{{
for a in desc.xpath('descendant::a[@href]'): for a in desc.xpath('descendant::a[@href]'):
del a.attrib['href'] del a.attrib['href']
a.tag = 'span' a.tag = 'span'
for a in desc.xpath('descendant::span[@class="a-text-italic"]'):
a.tag = 'i'
for a in desc.xpath('descendant::span[@class="a-text-bold"]'):
a.tag = 'b'
desc = self.tostring(desc, method='html', encoding='unicode').strip() desc = self.tostring(desc, method='html', encoding='unicode').strip()
desc = xml_replace_entities(desc, 'utf-8') desc = xml_replace_entities(desc, 'utf-8')
@ -705,25 +709,36 @@ class Worker(Thread): # Get details {{{
except ImportError: except ImportError:
from urllib import unquote from urllib import unquote
ans = '' ans = ''
ns = tuple(self.selector('#bookDescription_feature_div noscript')) ovr = tuple(self.selector('#drengr_MobileTabbedDescriptionOverviewContent_feature_div'))
if ns: if ovr:
ns = ns[0] ovr = ovr[0]
if len(ns) == 0 and ns.text: ovr.tag = 'div'
import html5lib ans = self._render_comments(ovr)
# html5lib parsed noscript as CDATA ovr = tuple(self.selector('#drengr_MobileTabbedDescriptionEditorialsContent_feature_div'))
ns = html5lib.parseFragment( if ovr:
'<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0] ovr = ovr[0]
else: ovr.tag = 'div'
ns.tag = 'div' ans += self._render_comments(ovr)
ans = self._render_comments(ns)
else: else:
desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]') ns = tuple(self.selector('#bookDescription_feature_div noscript'))
if desc: if ns:
ans = self._render_comments(desc[0]) ns = ns[0]
if len(ns) == 0 and ns.text:
import html5lib
# html5lib parsed noscript as CDATA
ns = html5lib.parseFragment(
'<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0]
else:
ns.tag = 'div'
ans = self._render_comments(ns)
else: else:
ns = tuple(self.selector('#bookDescription_feature_div .a-expander-content')) desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
if ns: if desc:
ans = self._render_comments(ns[0]) ans = self._render_comments(desc[0])
else:
ns = tuple(self.selector('#bookDescription_feature_div .a-expander-content'))
if ns:
ans = self._render_comments(ns[0])
desc = root.xpath( desc = root.xpath(
'//div[@id="productDescription"]/*[@class="content"]') '//div[@id="productDescription"]/*[@class="content"]')
@ -933,10 +948,11 @@ class Worker(Thread): # Get details {{{
def parse_detail_cells(self, mi, c1, c2): def parse_detail_cells(self, mi, c1, c2):
name = self.totext(c1, only_printable=True).strip().strip(':').strip() name = self.totext(c1, only_printable=True).strip().strip(':').strip()
val = self.totext(c2).strip() val = self.totext(c2).strip()
val = val.replace('\u200e', '').replace('\u200f', '')
if not val: if not val:
return return
if name in self.language_names: if name in self.language_names:
ans = self.lang_map.get(val, None) ans = self.lang_map.get(val)
if not ans: if not ans:
ans = canonicalize_lang(val) ans = canonicalize_lang(val)
if ans: if ans:
@ -1015,7 +1031,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 3, 1) version = (1, 3, 2)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')