From fc1a064525115aaa4adc5bf1ae817ebb68c1128a Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 2 Jun 2022 13:00:37 +0530 Subject: [PATCH] Update Scientific American --- recipes/scientific_american.recipe | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index 1acad5a74e..de688830ab 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -1,7 +1,7 @@ #!/usr/bin/env python __license__ = 'GPL v3' -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes from css_selectors import Select @@ -11,11 +11,6 @@ def absurl(url): return url -keep_classes = {'article-header', 'article-content', - 'article-media', 'article-author', 'article-text'} -remove_classes = {'aside-banner', 'moreToExplore', 'article-footer'} - - class ScientificAmerican(BasicNewsRecipe): title = u'Scientific American' description = u'Popular Science. Monthly magazine. Should be downloaded around the middle of each month.' @@ -31,12 +26,12 @@ class ScientificAmerican(BasicNewsRecipe): needs_subscription = 'optional' keep_only_tags = [ - dict(attrs={'class': lambda x: x and bool( - set(x.split()).intersection(keep_classes))}), + classes( + 'article-header article-content article-media article-author article-text feature-article--header' + ' feature-article--header-title opinion-article__header-title author-bio'), ] remove_tags = [ - dict(attrs={'class': lambda x: x and bool( - set(x.split()).intersection(remove_classes))}), + classes('aside-banner moreToExplore article-footer flex-column--25 article-author__suggested'), dict(id=['seeAlsoLinks']), ] @@ -55,12 +50,14 @@ class ScientificAmerican(BasicNewsRecipe): root = self.index_to_soup( 'https://www.scientificamerican.com/sciammag/', as_tree=True) select = Select(root) + self.cover_url = [x.get('src', '') for x in select('main .store-listing__img img')][0] url = [x.get('href', '') for x in select('main .store-listing__img a')][0] url = absurl(url) - self.cover_url = [x.get('src', '') for x in select('main .store-listing__img img')][0] # Now parse the actual issue to get the list of articles select = Select(self.index_to_soup(url, as_tree=True)) + self.cover_url = [x.get('src', '') for x in select('main .product-detail__image img')][0].split('?')[0] + self.cover_url += '?w=800' feeds = [] for i, section in enumerate(select('#sa_body .toc-articles')): if i == 0: