Another markup format for series info on amazon.com pages

This commit is contained in:
Kovid Goyal 2021-04-02 10:20:29 +05:30
parent eb56bc531b
commit 4b0745052d
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -691,6 +691,23 @@ class Worker(Thread): # Get details {{{
s[0], encoding='unicode', method='text', with_tail=False).strip()
if series:
ans = (series, series_index)
else:
series = root.xpath('//div[@id="seriesBullet_feature_div"]')
if series:
series = series[0]
spans = series.xpath('descendant::span')
if spans:
span = spans[0]
b = span.xpath('./b')
a = span.xpath('./a')
if a and b:
series = self.tostring(a[0], encoding='unicode', method='text', with_tail=False).strip()
if series:
raw = self.tostring(b[0], encoding='unicode', method='text', with_tail=False).strip()
m = re.search(r'[0-9.]+', raw)
if m is not None:
ans = (series, float(m.group()))
# This is found on Kindle edition pages on amazon.com
if ans == (None, None):
for span in root.xpath('//div[@id="aboutEbooksSection"]//li/span'):