Amazon metadata download: Add support for yet another variant of series markup

This commit is contained in:
Kovid Goyal 2021-11-10 13:27:00 +05:30
parent 45b17f6cad
commit 9df1350784
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -728,21 +728,14 @@ class Worker(Thread): # Get details {{{
if series:
ans = (series, series_index)
else:
series = root.xpath('//div[@id="seriesBullet_feature_div"]')
series = root.xpath('//div[@id="seriesBulletWidget_feature_div"]')
if series:
series = series[0]
spans = series.xpath('descendant::span')
if spans:
span = spans[0]
b = span.xpath('./b')
a = span.xpath('./a')
if a and b:
series = self.tostring(a[0], encoding='unicode', method='text', with_tail=False).strip()
if series:
raw = self.tostring(b[0], encoding='unicode', method='text', with_tail=False).strip()
m = re.search(r'[0-9.]+', raw)
if m is not None:
ans = (series, float(m.group()))
a = series[0].xpath('descendant::a')
if a:
raw = self.tostring(a[0], encoding='unicode', method='text', with_tail=False)
m = re.search(r'(?:Book|Libro)\s+(?P<index>[0-9.]+)\s+(?:of|de)\s+([0-9.]+)\s*:\s*(?P<series>.+)', raw.strip())
if m is not None:
ans = (m.group('series').strip(), float(m.group('index')))
# This is found on Kindle edition pages on amazon.com
if ans == (None, None):
@ -975,7 +968,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source):
name = 'Amazon.com'
version = (1, 2, 20)
version = (1, 2, 21)
minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon')
@ -1654,7 +1647,7 @@ def manual_tests(domain, **kw): # {{{
( # No specific problems
{'identifiers': {'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True),
[title_test('the great gatsby: the only authorized edition', exact=True),
authors_test(['Francis Scott Fitzgerald'])]
),