mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Amazon metadata download: Fixes for various changes to amazon website markup
This commit is contained in:
parent
dd33bd079a
commit
4526abb5e3
@ -497,6 +497,7 @@ class Worker(Thread): # Get details {{{
|
||||
non_hero = tuple(self.selector(
|
||||
'div#bookDetails_container_div div#nonHeroSection')) or tuple(self.selector(
|
||||
'#productDetails_techSpec_sections'))
|
||||
feature_and_detail_bullets = root.xpath('//*[@data-feature-name="featureBulletsAndDetailBullets"]')
|
||||
if detail_bullets:
|
||||
self.parse_detail_bullets(root, mi, detail_bullets[0])
|
||||
elif non_hero:
|
||||
@ -505,6 +506,8 @@ class Worker(Thread): # Get details {{{
|
||||
except:
|
||||
self.log.exception(
|
||||
'Failed to parse new-style book details section')
|
||||
elif feature_and_detail_bullets:
|
||||
self.parse_detail_bullets(root, mi, feature_and_detail_bullets[0], ul_selector='ul')
|
||||
|
||||
else:
|
||||
pd = root.xpath(self.pd_xpath)
|
||||
@ -674,6 +677,16 @@ class Worker(Thread): # Get details {{{
|
||||
ans = parse_ratings_text(t)
|
||||
if ans is not None:
|
||||
return ans
|
||||
else:
|
||||
# found in kindle book pages on amazon.com
|
||||
for x in root.xpath('//a[@id="acrCustomerReviewLink"]'):
|
||||
spans = x.xpath('./span')
|
||||
if spans:
|
||||
txt = self.tostring(spans[0], method='text', encoding='unicode', with_tail=False).strip()
|
||||
try:
|
||||
return float(txt.replace(',', '.'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _render_comments(self, desc):
|
||||
from calibre.library.comments import sanitize_comments_html
|
||||
@ -784,6 +797,20 @@ class Worker(Thread): # Get details {{{
|
||||
def parse_series(self, root):
|
||||
ans = (None, None)
|
||||
|
||||
# This is found on kindle pages for books on amazon.com
|
||||
series = root.xpath('//*[@id="rpi-attribute-book_details-series"]')
|
||||
if series:
|
||||
spans = series[0].xpath('descendant::span')
|
||||
if spans:
|
||||
texts = [self.tostring(x, encoding='unicode', method='text', with_tail=False).strip() for x in spans]
|
||||
texts = list(filter(None, texts))
|
||||
if len(texts) == 2:
|
||||
idxinfo, series = texts
|
||||
m = re.search(r'[0-9.]+', idxinfo.strip())
|
||||
if m is not None:
|
||||
ans = series, float(m.group())
|
||||
return ans
|
||||
|
||||
# This is found on the paperback/hardback pages for books on amazon.com
|
||||
series = root.xpath('//div[@data-feature-name="seriesTitle"]')
|
||||
if series:
|
||||
@ -952,8 +979,11 @@ class Worker(Thread): # Get details {{{
|
||||
if url:
|
||||
return url
|
||||
|
||||
def parse_detail_bullets(self, root, mi, container):
|
||||
ul = next(self.selector('.detail-bullet-list', root=container))
|
||||
def parse_detail_bullets(self, root, mi, container, ul_selector='.detail-bullet-list'):
|
||||
try:
|
||||
ul = next(self.selector(ul_selector, root=container))
|
||||
except StopIteration:
|
||||
return
|
||||
for span in self.selector('.a-list-item', root=ul):
|
||||
cells = span.xpath('./span')
|
||||
if len(cells) >= 2:
|
||||
@ -1052,7 +1082,7 @@ class Worker(Thread): # Get details {{{
|
||||
class Amazon(Source):
|
||||
|
||||
name = 'Amazon.com'
|
||||
version = (1, 3, 6)
|
||||
version = (1, 3, 7)
|
||||
minimum_calibre_version = (2, 82, 0)
|
||||
description = _('Downloads metadata and covers from Amazon')
|
||||
|
||||
@ -1732,8 +1762,7 @@ def manual_tests(domain, **kw): # {{{
|
||||
all_tests['com'] = [ # {{{
|
||||
( # Paperback with series
|
||||
{'identifiers': {'amazon': '1423146786'}},
|
||||
[title_test('The Blood of Olympus',
|
||||
exact=True), series_test('The Heroes of Olympus', 5)]
|
||||
[title_test('Heroes of Olympus', exact=False), series_test('The Heroes of Olympus', 5)]
|
||||
),
|
||||
|
||||
( # Kindle edition with series
|
||||
@ -1752,7 +1781,7 @@ def manual_tests(domain, **kw): # {{{
|
||||
( # Different comments markup, using Book Description section
|
||||
{'identifiers': {'amazon': '0982514506'}},
|
||||
[title_test(
|
||||
"Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy",
|
||||
"Griffin's Destiny",
|
||||
exact=True),
|
||||
comments_test('Jelena'), comments_test('Ashinji'),
|
||||
]
|
||||
@ -1768,8 +1797,8 @@ def manual_tests(domain, **kw): # {{{
|
||||
|
||||
( # No specific problems
|
||||
{'identifiers': {'isbn': '0743273567'}},
|
||||
[title_test('the great gatsby: the only authorized edition', exact=True),
|
||||
authors_test(['Francis Scott Fitzgerald'])]
|
||||
[title_test('the great gatsby'),
|
||||
authors_test(['f. Scott Fitzgerald'])]
|
||||
),
|
||||
|
||||
]
|
||||
|
Loading…
x
Reference in New Issue
Block a user