Update Scientific American

This commit is contained in:
Kovid Goyal 2019-09-11 19:45:58 +05:30
parent 2b8ac505e2
commit 05d0f0895f
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,7 +7,7 @@ from css_selectors import Select
def absurl(url): def absurl(url):
if url.startswith('/'): if url.startswith('/'):
url = 'http://www.scientificamerican.com' + url url = 'https://www.scientificamerican.com' + url
return url return url
@ -53,7 +53,7 @@ class ScientificAmerican(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
# Get the cover, date and issue URL # Get the cover, date and issue URL
root = self.index_to_soup( root = self.index_to_soup(
'http://www.scientificamerican.com/sciammag/', as_tree=True) 'https://www.scientificamerican.com/sciammag/', as_tree=True)
select = Select(root) select = Select(root)
url = [x.get('href', '') for x in select('main .store-listing__img a')][0] url = [x.get('href', '') for x in select('main .store-listing__img a')][0]
url = absurl(url) url = absurl(url)
@ -74,12 +74,13 @@ class ScientificAmerican(BasicNewsRecipe):
def parse_sciam_features(self, select, section): def parse_sciam_features(self, select, section):
for article in select('article[data-article-title]', section): for article in select('article[data-article-title]', section):
title = article.get('data-article-title') title = article.get('data-article-title')
for a in select('a[href]', article): url = 'https://www.scientificamerican.com/{}/'.format(article.get('id').replace('-', '/', 1))
url = absurl(a.get('href'))
break
desc = '' desc = ''
for p in select('p.t_body', article): for p in select('p.t_body', article):
desc = self.tag_to_string(p) desc += self.tag_to_string(p)
break
for p in select('.t_meta', article):
desc += ' ' + self.tag_to_string(p)
break break
self.log('Found feature article: %s at %s' % (title, url)) self.log('Found feature article: %s at %s' % (title, url))
self.log('\t' + desc) self.log('\t' + desc)
@ -94,11 +95,14 @@ class ScientificAmerican(BasicNewsRecipe):
section_title, articles = self.tag_to_string(span), [] section_title, articles = self.tag_to_string(span), []
self.log('\nFound section: %s' % section_title) self.log('\nFound section: %s' % section_title)
break break
for a in select('h2 a[href]', li): url = 'https://www.scientificamerican.com/{}/'.format(li.get('id').replace('-', '/', 1))
title = self.tag_to_string(a) for h2 in select('h2.t_listing-title', li):
url = absurl(a.get('href')) title = self.tag_to_string(h2)
articles.append( break
else:
continue
articles.append(
{'title': title, 'url': url, 'description': ''}) {'title': title, 'url': url, 'description': ''})
self.log('\tFound article: %s at %s' % (title, url)) self.log('\tFound article: %s at %s' % (title, url))
if articles: if articles:
yield section_title, articles yield section_title, articles