calibre/recipes/popscience.recipe
2021-06-02 09:24:25 +05:30

61 lines
2.0 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2020, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class AdvancedUserRecipe1282101454(BasicNewsRecipe):
title = 'Popular Science'
language = 'en'
__author__ = 'Kovid Goyal'
description = 'Popular Science'
publisher = 'Popular Science'
max_articles_per_feed = 100
ignore_duplicate_articles = {'url'}
no_stylesheets = True
keep_only_tags = [
classes('content-body article-header featured-img'),
]
def parse_section_index(self, slug):
soup = self.index_to_soup('https://www.popsci.com/{}/'.format(slug))
main = soup.find(**classes('main-module'))
for div in main.findAll(**classes('main-item')):
a = div.find('a', href=True, **classes('linkable'))
url = a['href']
title = self.tag_to_string(a.find(**classes('title')))
desc = ''
dek = a.find(**classes('dek'))
if dek is not None:
desc = self.tag_to_string(dek)
self.log(' ', title, url)
yield {'title': title, 'url': url, 'description': desc}
def parse_index(self):
sections = []
for slug, title in {
'science': 'Science',
'technology': 'Technology',
'diy': 'DIY',
'reviews': 'Reviews',
}.items():
self.log('Section:', title)
articles = list(self.parse_section_index(slug))
if articles:
sections.append((title, articles))
return sections
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-medsrc': True}):
img['src'] = img['data-medsrc']
return soup