mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Update Scientific American
This commit is contained in:
parent
2b8ac505e2
commit
05d0f0895f
@ -7,7 +7,7 @@ from css_selectors import Select
|
|||||||
|
|
||||||
def absurl(url):
|
def absurl(url):
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'http://www.scientificamerican.com' + url
|
url = 'https://www.scientificamerican.com' + url
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
||||||
@ -53,7 +53,7 @@ class ScientificAmerican(BasicNewsRecipe):
|
|||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
# Get the cover, date and issue URL
|
# Get the cover, date and issue URL
|
||||||
root = self.index_to_soup(
|
root = self.index_to_soup(
|
||||||
'http://www.scientificamerican.com/sciammag/', as_tree=True)
|
'https://www.scientificamerican.com/sciammag/', as_tree=True)
|
||||||
select = Select(root)
|
select = Select(root)
|
||||||
url = [x.get('href', '') for x in select('main .store-listing__img a')][0]
|
url = [x.get('href', '') for x in select('main .store-listing__img a')][0]
|
||||||
url = absurl(url)
|
url = absurl(url)
|
||||||
@ -74,12 +74,13 @@ class ScientificAmerican(BasicNewsRecipe):
|
|||||||
def parse_sciam_features(self, select, section):
|
def parse_sciam_features(self, select, section):
|
||||||
for article in select('article[data-article-title]', section):
|
for article in select('article[data-article-title]', section):
|
||||||
title = article.get('data-article-title')
|
title = article.get('data-article-title')
|
||||||
for a in select('a[href]', article):
|
url = 'https://www.scientificamerican.com/{}/'.format(article.get('id').replace('-', '/', 1))
|
||||||
url = absurl(a.get('href'))
|
|
||||||
break
|
|
||||||
desc = ''
|
desc = ''
|
||||||
for p in select('p.t_body', article):
|
for p in select('p.t_body', article):
|
||||||
desc = self.tag_to_string(p)
|
desc += self.tag_to_string(p)
|
||||||
|
break
|
||||||
|
for p in select('.t_meta', article):
|
||||||
|
desc += ' ' + self.tag_to_string(p)
|
||||||
break
|
break
|
||||||
self.log('Found feature article: %s at %s' % (title, url))
|
self.log('Found feature article: %s at %s' % (title, url))
|
||||||
self.log('\t' + desc)
|
self.log('\t' + desc)
|
||||||
@ -94,11 +95,14 @@ class ScientificAmerican(BasicNewsRecipe):
|
|||||||
section_title, articles = self.tag_to_string(span), []
|
section_title, articles = self.tag_to_string(span), []
|
||||||
self.log('\nFound section: %s' % section_title)
|
self.log('\nFound section: %s' % section_title)
|
||||||
break
|
break
|
||||||
for a in select('h2 a[href]', li):
|
url = 'https://www.scientificamerican.com/{}/'.format(li.get('id').replace('-', '/', 1))
|
||||||
title = self.tag_to_string(a)
|
for h2 in select('h2.t_listing-title', li):
|
||||||
url = absurl(a.get('href'))
|
title = self.tag_to_string(h2)
|
||||||
articles.append(
|
break
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
articles.append(
|
||||||
{'title': title, 'url': url, 'description': ''})
|
{'title': title, 'url': url, 'description': ''})
|
||||||
self.log('\tFound article: %s at %s' % (title, url))
|
self.log('\tFound article: %s at %s' % (title, url))
|
||||||
if articles:
|
if articles:
|
||||||
yield section_title, articles
|
yield section_title, articles
|
||||||
|
Loading…
x
Reference in New Issue
Block a user