Update Popular Science

This commit is contained in:
Kovid Goyal 2024-12-06 19:30:39 +05:30
parent 6e13089def
commit 485e1a2d20
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -23,20 +23,26 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
ignore_duplicate_articles = {'url'} ignore_duplicate_articles = {'url'}
no_stylesheets = True no_stylesheets = True
keep_only_tags = [ keep_only_tags = [
classes('Article-header Article-excerpt Article-author Article-thumbnail Article-bodyText'), classes('Article-header Article-excerpt Article-author Article-thumbnail Article-bodyText article-title article-dek article-paragraph articlebody'),
]
remove_tags = [
dict(name='section', attrs={'class': ['recurrent-share']})
] ]
def parse_section_index(self, slug): def parse_section_index(self, slug):
url = 'https://www.popsci.com/{}/'.format(slug) url = 'https://www.popsci.com/{}/'.format(slug)
self.log('Section:', url) self.log('Section:', url)
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
main = soup.find(**classes('PostsContainer')) main = soup.find(**classes('category-content-wrapper lg:pb-12'))
for div in main.findAll(**classes('PostItem')): if main is None:
a = div.find('a', href=True, **classes('PostItem-link')) return
for div in main.findAll(**classes('card-post')):
a = div.find('a', href=True, **classes('card-post-image-link'))
url = a['href'] url = a['href']
title = self.tag_to_string(div.find(**classes('PostItem-title'))) tdiv = div.find(**classes('card-post-title')).find(**classes('mobile'))
title = self.tag_to_string(tdiv)
desc = '' desc = ''
dek = div.find(**classes('PostItem-excerpt')) dek = div.find(**classes('card-post-excerpt'))
if dek is not None: if dek is not None:
desc = self.tag_to_string(dek) desc = self.tag_to_string(dek)
self.log(' ', title, url) self.log(' ', title, url)
@ -50,6 +56,7 @@ class AdvancedUserRecipe1282101454(BasicNewsRecipe):
'diy': 'DIY', 'diy': 'DIY',
'reviews': 'Reviews', 'reviews': 'Reviews',
}.items(): }.items():
self.log('slug:', slug)
articles = list(self.parse_section_index(slug)) articles = list(self.parse_section_index(slug))
if articles: if articles:
sections.append((title, articles)) sections.append((title, articles))