#!/usr/bin/env python # vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe class Habr(BasicNewsRecipe): title = 'Habr' __author__ = 'bugmen00t' description = 'Russian collaborative blog about IT, computer science and anything related to the Internet' publisher = 'Habr Blockchain Publishing LTD' category = 'blog' cover_url = u'https://hsto.org/webt/f1/lq/ka/f1lqkaveikdfqkb_rip_4vq4s_8.png' language = 'en_RU' no_stylesheets = True remove_javascript = False auto_cleanup = False oldest_article = 30 max_articles_per_feed = 30 remove_tags_before = dict(name='h1') remove_tags_after = dict(name='div', attrs={'class': 'tm-misprint-area'}) remove_tags = [ dict(name='div', attrs={'class': 'tm-article-presenter__meta'}), dict(name='div', attrs={'class': 'tm-article-poll'}) ] feeds = [('News', 'https://habr.com/en/rss/news/?fl=en'), ('All materials', 'https://habr.com/en/rss/all?fl=en')] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup