#!/usr/bin/env python # vim:fileencoding=utf-8 from __future__ import absolute_import, division, print_function, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict( attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} ) class Federalist(BasicNewsRecipe): title = 'The Federalist' __author__ = 'Kovid Goyal' language = 'en' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False remove_attributes = ['xmlns', 'lang', 'style', 'width', 'height'] extra_css = ''' .shortbio { margin: 1em; padding: 1em; font-style: italic } ''' keep_only_tags = [ classes('entry-header'), classes('wp-post-image post-categories entry-content shortbio byline-month byline-standard alpha-byline'), ] remove_tags = [ dict(name=['meta', 'link']), classes('auth-ad attachment-post-thumbnail attachment-author-bio'), ] feeds = [ ('All', 'http://thefederalist.com/feed/'), ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-lazy-src': True}): img['src'] = img['data-lazy-src'] seen = set() for img in soup.findAll('img', src=True): src = img['src'] if src in seen: img.extract() seen.add(src) return soup # def parse_index(self): # return [('Articles', [ # {'title':'img', 'url':'http://thefederalist.com/2018/05/09/venezuelas-economic-problems-caused-socialism-not-falling-oil-prices/'}, # {'title':'xxx', 'url':'http://thefederalist.com/2018/05/04/fans-take-on-marvel-dc-and-the-comic-book-industrys-sjw-self-destruction/'}, # ])]