mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Amazon metadata download plugin for website changes
This commit is contained in:
parent
9686fb190d
commit
a5f4b90e47
@ -682,6 +682,10 @@ class Worker(Thread): # Get details {{{
|
|||||||
for a in desc.xpath('descendant::a[@href]'):
|
for a in desc.xpath('descendant::a[@href]'):
|
||||||
del a.attrib['href']
|
del a.attrib['href']
|
||||||
a.tag = 'span'
|
a.tag = 'span'
|
||||||
|
for a in desc.xpath('descendant::span[@class="a-text-italic"]'):
|
||||||
|
a.tag = 'i'
|
||||||
|
for a in desc.xpath('descendant::span[@class="a-text-bold"]'):
|
||||||
|
a.tag = 'b'
|
||||||
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
desc = self.tostring(desc, method='html', encoding='unicode').strip()
|
||||||
desc = xml_replace_entities(desc, 'utf-8')
|
desc = xml_replace_entities(desc, 'utf-8')
|
||||||
|
|
||||||
@ -705,25 +709,36 @@ class Worker(Thread): # Get details {{{
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
from urllib import unquote
|
from urllib import unquote
|
||||||
ans = ''
|
ans = ''
|
||||||
ns = tuple(self.selector('#bookDescription_feature_div noscript'))
|
ovr = tuple(self.selector('#drengr_MobileTabbedDescriptionOverviewContent_feature_div'))
|
||||||
if ns:
|
if ovr:
|
||||||
ns = ns[0]
|
ovr = ovr[0]
|
||||||
if len(ns) == 0 and ns.text:
|
ovr.tag = 'div'
|
||||||
import html5lib
|
ans = self._render_comments(ovr)
|
||||||
# html5lib parsed noscript as CDATA
|
ovr = tuple(self.selector('#drengr_MobileTabbedDescriptionEditorialsContent_feature_div'))
|
||||||
ns = html5lib.parseFragment(
|
if ovr:
|
||||||
'<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0]
|
ovr = ovr[0]
|
||||||
else:
|
ovr.tag = 'div'
|
||||||
ns.tag = 'div'
|
ans += self._render_comments(ovr)
|
||||||
ans = self._render_comments(ns)
|
|
||||||
else:
|
else:
|
||||||
desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
|
ns = tuple(self.selector('#bookDescription_feature_div noscript'))
|
||||||
if desc:
|
if ns:
|
||||||
ans = self._render_comments(desc[0])
|
ns = ns[0]
|
||||||
|
if len(ns) == 0 and ns.text:
|
||||||
|
import html5lib
|
||||||
|
# html5lib parsed noscript as CDATA
|
||||||
|
ns = html5lib.parseFragment(
|
||||||
|
'<div>%s</div>' % (ns.text), treebuilder='lxml', namespaceHTMLElements=False)[0]
|
||||||
|
else:
|
||||||
|
ns.tag = 'div'
|
||||||
|
ans = self._render_comments(ns)
|
||||||
else:
|
else:
|
||||||
ns = tuple(self.selector('#bookDescription_feature_div .a-expander-content'))
|
desc = root.xpath('//div[@id="ps-content"]/div[@class="content"]')
|
||||||
if ns:
|
if desc:
|
||||||
ans = self._render_comments(ns[0])
|
ans = self._render_comments(desc[0])
|
||||||
|
else:
|
||||||
|
ns = tuple(self.selector('#bookDescription_feature_div .a-expander-content'))
|
||||||
|
if ns:
|
||||||
|
ans = self._render_comments(ns[0])
|
||||||
|
|
||||||
desc = root.xpath(
|
desc = root.xpath(
|
||||||
'//div[@id="productDescription"]/*[@class="content"]')
|
'//div[@id="productDescription"]/*[@class="content"]')
|
||||||
@ -933,10 +948,11 @@ class Worker(Thread): # Get details {{{
|
|||||||
def parse_detail_cells(self, mi, c1, c2):
|
def parse_detail_cells(self, mi, c1, c2):
|
||||||
name = self.totext(c1, only_printable=True).strip().strip(':').strip()
|
name = self.totext(c1, only_printable=True).strip().strip(':').strip()
|
||||||
val = self.totext(c2).strip()
|
val = self.totext(c2).strip()
|
||||||
|
val = val.replace('\u200e', '').replace('\u200f', '')
|
||||||
if not val:
|
if not val:
|
||||||
return
|
return
|
||||||
if name in self.language_names:
|
if name in self.language_names:
|
||||||
ans = self.lang_map.get(val, None)
|
ans = self.lang_map.get(val)
|
||||||
if not ans:
|
if not ans:
|
||||||
ans = canonicalize_lang(val)
|
ans = canonicalize_lang(val)
|
||||||
if ans:
|
if ans:
|
||||||
@ -1015,7 +1031,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 3, 1)
|
version = (1, 3, 2)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user