Amazon metadata download: Add support for yet another variant of series markup

This commit is contained in:
Kovid Goyal 2021-11-10 13:27:00 +05:30
parent 45b17f6cad
commit 9df1350784
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -728,21 +728,14 @@ class Worker(Thread): # Get details {{{
if series: if series:
ans = (series, series_index) ans = (series, series_index)
else: else:
series = root.xpath('//div[@id="seriesBullet_feature_div"]') series = root.xpath('//div[@id="seriesBulletWidget_feature_div"]')
if series: if series:
series = series[0] a = series[0].xpath('descendant::a')
spans = series.xpath('descendant::span') if a:
if spans: raw = self.tostring(a[0], encoding='unicode', method='text', with_tail=False)
span = spans[0] m = re.search(r'(?:Book|Libro)\s+(?P<index>[0-9.]+)\s+(?:of|de)\s+([0-9.]+)\s*:\s*(?P<series>.+)', raw.strip())
b = span.xpath('./b') if m is not None:
a = span.xpath('./a') ans = (m.group('series').strip(), float(m.group('index')))
if a and b:
series = self.tostring(a[0], encoding='unicode', method='text', with_tail=False).strip()
if series:
raw = self.tostring(b[0], encoding='unicode', method='text', with_tail=False).strip()
m = re.search(r'[0-9.]+', raw)
if m is not None:
ans = (series, float(m.group()))
# This is found on Kindle edition pages on amazon.com # This is found on Kindle edition pages on amazon.com
if ans == (None, None): if ans == (None, None):
@ -975,7 +968,7 @@ class Worker(Thread): # Get details {{{
class Amazon(Source): class Amazon(Source):
name = 'Amazon.com' name = 'Amazon.com'
version = (1, 2, 20) version = (1, 2, 21)
minimum_calibre_version = (2, 82, 0) minimum_calibre_version = (2, 82, 0)
description = _('Downloads metadata and covers from Amazon') description = _('Downloads metadata and covers from Amazon')
@ -1654,7 +1647,7 @@ def manual_tests(domain, **kw): # {{{
( # No specific problems ( # No specific problems
{'identifiers': {'isbn': '0743273567'}}, {'identifiers': {'isbn': '0743273567'}},
[title_test('The great gatsby', exact=True), [title_test('the great gatsby: the only authorized edition', exact=True),
authors_test(['Francis Scott Fitzgerald'])] authors_test(['Francis Scott Fitzgerald'])]
), ),