From 05d0f0895f36d8f43a778314e83a20d106e9120c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 11 Sep 2019 19:45:58 +0530 Subject: [PATCH] Update Scientific American --- recipes/scientific_american.recipe | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index 7a92dd0d5a..9e5ec483da 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -7,7 +7,7 @@ from css_selectors import Select def absurl(url): if url.startswith('/'): - url = 'http://www.scientificamerican.com' + url + url = 'https://www.scientificamerican.com' + url return url @@ -53,7 +53,7 @@ class ScientificAmerican(BasicNewsRecipe): def parse_index(self): # Get the cover, date and issue URL root = self.index_to_soup( - 'http://www.scientificamerican.com/sciammag/', as_tree=True) + 'https://www.scientificamerican.com/sciammag/', as_tree=True) select = Select(root) url = [x.get('href', '') for x in select('main .store-listing__img a')][0] url = absurl(url) @@ -74,12 +74,13 @@ class ScientificAmerican(BasicNewsRecipe): def parse_sciam_features(self, select, section): for article in select('article[data-article-title]', section): title = article.get('data-article-title') - for a in select('a[href]', article): - url = absurl(a.get('href')) - break + url = 'https://www.scientificamerican.com/{}/'.format(article.get('id').replace('-', '/', 1)) desc = '' for p in select('p.t_body', article): - desc = self.tag_to_string(p) + desc += self.tag_to_string(p) + break + for p in select('.t_meta', article): + desc += ' ' + self.tag_to_string(p) break self.log('Found feature article: %s at %s' % (title, url)) self.log('\t' + desc) @@ -94,11 +95,14 @@ class ScientificAmerican(BasicNewsRecipe): section_title, articles = self.tag_to_string(span), [] self.log('\nFound section: %s' % section_title) break - for a in select('h2 a[href]', li): - title = self.tag_to_string(a) - url = absurl(a.get('href')) - articles.append( + url = 'https://www.scientificamerican.com/{}/'.format(li.get('id').replace('-', '/', 1)) + for h2 in select('h2.t_listing-title', li): + title = self.tag_to_string(h2) + break + else: + continue + articles.append( {'title': title, 'url': url, 'description': ''}) - self.log('\tFound article: %s at %s' % (title, url)) + self.log('\tFound article: %s at %s' % (title, url)) if articles: yield section_title, articles