mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-31 14:33:54 -04:00
Amazon metadata download: Fixes for various changes to amazon website markup
This commit is contained in:
parent
dd33bd079a
commit
4526abb5e3
@ -497,6 +497,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
non_hero = tuple(self.selector(
|
non_hero = tuple(self.selector(
|
||||||
'div#bookDetails_container_div div#nonHeroSection')) or tuple(self.selector(
|
'div#bookDetails_container_div div#nonHeroSection')) or tuple(self.selector(
|
||||||
'#productDetails_techSpec_sections'))
|
'#productDetails_techSpec_sections'))
|
||||||
|
feature_and_detail_bullets = root.xpath('//*[@data-feature-name="featureBulletsAndDetailBullets"]')
|
||||||
if detail_bullets:
|
if detail_bullets:
|
||||||
self.parse_detail_bullets(root, mi, detail_bullets[0])
|
self.parse_detail_bullets(root, mi, detail_bullets[0])
|
||||||
elif non_hero:
|
elif non_hero:
|
||||||
@ -505,6 +506,8 @@ class Worker(Thread): # Get details {{{
|
|||||||
except:
|
except:
|
||||||
self.log.exception(
|
self.log.exception(
|
||||||
'Failed to parse new-style book details section')
|
'Failed to parse new-style book details section')
|
||||||
|
elif feature_and_detail_bullets:
|
||||||
|
self.parse_detail_bullets(root, mi, feature_and_detail_bullets[0], ul_selector='ul')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
pd = root.xpath(self.pd_xpath)
|
pd = root.xpath(self.pd_xpath)
|
||||||
@ -674,6 +677,16 @@ class Worker(Thread): # Get details {{{
|
|||||||
ans = parse_ratings_text(t)
|
ans = parse_ratings_text(t)
|
||||||
if ans is not None:
|
if ans is not None:
|
||||||
return ans
|
return ans
|
||||||
|
else:
|
||||||
|
# found in kindle book pages on amazon.com
|
||||||
|
for x in root.xpath('//a[@id="acrCustomerReviewLink"]'):
|
||||||
|
spans = x.xpath('./span')
|
||||||
|
if spans:
|
||||||
|
txt = self.tostring(spans[0], method='text', encoding='unicode', with_tail=False).strip()
|
||||||
|
try:
|
||||||
|
return float(txt.replace(',', '.'))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
def _render_comments(self, desc):
|
def _render_comments(self, desc):
|
||||||
from calibre.library.comments import sanitize_comments_html
|
from calibre.library.comments import sanitize_comments_html
|
||||||
@ -784,6 +797,20 @@ class Worker(Thread): # Get details {{{
|
|||||||
def parse_series(self, root):
|
def parse_series(self, root):
|
||||||
ans = (None, None)
|
ans = (None, None)
|
||||||
|
|
||||||
|
# This is found on kindle pages for books on amazon.com
|
||||||
|
series = root.xpath('//*[@id="rpi-attribute-book_details-series"]')
|
||||||
|
if series:
|
||||||
|
spans = series[0].xpath('descendant::span')
|
||||||
|
if spans:
|
||||||
|
texts = [self.tostring(x, encoding='unicode', method='text', with_tail=False).strip() for x in spans]
|
||||||
|
texts = list(filter(None, texts))
|
||||||
|
if len(texts) == 2:
|
||||||
|
idxinfo, series = texts
|
||||||
|
m = re.search(r'[0-9.]+', idxinfo.strip())
|
||||||
|
if m is not None:
|
||||||
|
ans = series, float(m.group())
|
||||||
|
return ans
|
||||||
|
|
||||||
# This is found on the paperback/hardback pages for books on amazon.com
|
# This is found on the paperback/hardback pages for books on amazon.com
|
||||||
series = root.xpath('//div[@data-feature-name="seriesTitle"]')
|
series = root.xpath('//div[@data-feature-name="seriesTitle"]')
|
||||||
if series:
|
if series:
|
||||||
@ -952,8 +979,11 @@ class Worker(Thread): # Get details {{{
|
|||||||
if url:
|
if url:
|
||||||
return url
|
return url
|
||||||
|
|
||||||
def parse_detail_bullets(self, root, mi, container):
|
def parse_detail_bullets(self, root, mi, container, ul_selector='.detail-bullet-list'):
|
||||||
ul = next(self.selector('.detail-bullet-list', root=container))
|
try:
|
||||||
|
ul = next(self.selector(ul_selector, root=container))
|
||||||
|
except StopIteration:
|
||||||
|
return
|
||||||
for span in self.selector('.a-list-item', root=ul):
|
for span in self.selector('.a-list-item', root=ul):
|
||||||
cells = span.xpath('./span')
|
cells = span.xpath('./span')
|
||||||
if len(cells) >= 2:
|
if len(cells) >= 2:
|
||||||
@ -1052,7 +1082,7 @@ class Worker(Thread): # Get details {{{
|
|||||||
class Amazon(Source):
|
class Amazon(Source):
|
||||||
|
|
||||||
name = 'Amazon.com'
|
name = 'Amazon.com'
|
||||||
version = (1, 3, 6)
|
version = (1, 3, 7)
|
||||||
minimum_calibre_version = (2, 82, 0)
|
minimum_calibre_version = (2, 82, 0)
|
||||||
description = _('Downloads metadata and covers from Amazon')
|
description = _('Downloads metadata and covers from Amazon')
|
||||||
|
|
||||||
@ -1732,8 +1762,7 @@ def manual_tests(domain, **kw): # {{{
|
|||||||
all_tests['com'] = [ # {{{
|
all_tests['com'] = [ # {{{
|
||||||
( # Paperback with series
|
( # Paperback with series
|
||||||
{'identifiers': {'amazon': '1423146786'}},
|
{'identifiers': {'amazon': '1423146786'}},
|
||||||
[title_test('The Blood of Olympus',
|
[title_test('Heroes of Olympus', exact=False), series_test('The Heroes of Olympus', 5)]
|
||||||
exact=True), series_test('The Heroes of Olympus', 5)]
|
|
||||||
),
|
),
|
||||||
|
|
||||||
( # Kindle edition with series
|
( # Kindle edition with series
|
||||||
@ -1752,7 +1781,7 @@ def manual_tests(domain, **kw): # {{{
|
|||||||
( # Different comments markup, using Book Description section
|
( # Different comments markup, using Book Description section
|
||||||
{'identifiers': {'amazon': '0982514506'}},
|
{'identifiers': {'amazon': '0982514506'}},
|
||||||
[title_test(
|
[title_test(
|
||||||
"Griffin's Destiny: Book Three: The Griffin's Daughter Trilogy",
|
"Griffin's Destiny",
|
||||||
exact=True),
|
exact=True),
|
||||||
comments_test('Jelena'), comments_test('Ashinji'),
|
comments_test('Jelena'), comments_test('Ashinji'),
|
||||||
]
|
]
|
||||||
@ -1768,8 +1797,8 @@ def manual_tests(domain, **kw): # {{{
|
|||||||
|
|
||||||
( # No specific problems
|
( # No specific problems
|
||||||
{'identifiers': {'isbn': '0743273567'}},
|
{'identifiers': {'isbn': '0743273567'}},
|
||||||
[title_test('the great gatsby: the only authorized edition', exact=True),
|
[title_test('the great gatsby'),
|
||||||
authors_test(['Francis Scott Fitzgerald'])]
|
authors_test(['f. Scott Fitzgerald'])]
|
||||||
),
|
),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user