From 9df1350784327daf8a7a8a035affcab0b60c13a9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 10 Nov 2021 13:27:00 +0530 Subject: [PATCH] Amazon metadata download: Add support for yet another variant of series markup --- src/calibre/ebooks/metadata/sources/amazon.py | 25 +++++++------------ 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/src/calibre/ebooks/metadata/sources/amazon.py b/src/calibre/ebooks/metadata/sources/amazon.py index 35565ec5ea..52a9b44bbc 100644 --- a/src/calibre/ebooks/metadata/sources/amazon.py +++ b/src/calibre/ebooks/metadata/sources/amazon.py @@ -728,21 +728,14 @@ class Worker(Thread): # Get details {{{ if series: ans = (series, series_index) else: - series = root.xpath('//div[@id="seriesBullet_feature_div"]') + series = root.xpath('//div[@id="seriesBulletWidget_feature_div"]') if series: - series = series[0] - spans = series.xpath('descendant::span') - if spans: - span = spans[0] - b = span.xpath('./b') - a = span.xpath('./a') - if a and b: - series = self.tostring(a[0], encoding='unicode', method='text', with_tail=False).strip() - if series: - raw = self.tostring(b[0], encoding='unicode', method='text', with_tail=False).strip() - m = re.search(r'[0-9.]+', raw) - if m is not None: - ans = (series, float(m.group())) + a = series[0].xpath('descendant::a') + if a: + raw = self.tostring(a[0], encoding='unicode', method='text', with_tail=False) + m = re.search(r'(?:Book|Libro)\s+(?P[0-9.]+)\s+(?:of|de)\s+([0-9.]+)\s*:\s*(?P.+)', raw.strip()) + if m is not None: + ans = (m.group('series').strip(), float(m.group('index'))) # This is found on Kindle edition pages on amazon.com if ans == (None, None): @@ -975,7 +968,7 @@ class Worker(Thread): # Get details {{{ class Amazon(Source): name = 'Amazon.com' - version = (1, 2, 20) + version = (1, 2, 21) minimum_calibre_version = (2, 82, 0) description = _('Downloads metadata and covers from Amazon') @@ -1654,7 +1647,7 @@ def manual_tests(domain, **kw): # {{{ ( # No specific problems {'identifiers': {'isbn': '0743273567'}}, - [title_test('The great gatsby', exact=True), + [title_test('the great gatsby: the only authorized edition', exact=True), authors_test(['Francis Scott Fitzgerald'])] ),