#!/usr/bin/env python # vim:fileencoding=utf-8 # License: GPLv3 Copyright: 2016, Kovid Goyal from __future__ import absolute_import, division, print_function, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class DenverPost(BasicNewsRecipe): title = u'Denver Post' language = 'en' __author__ = 'Kovid Goyal' oldest_article = 1 # days max_articles_per_feed = 20 no_stylesheets = True use_embedded_content = False keep_only_tags = [ dict(name='h1'), classes('subheadline byline header-features article-body'), ] remove_tags = [ classes('related'), dict(attrs={'data-config-video-id': True}), ] feeds = [ ('Top Stories', 'http://feeds.denverpost.com/dp-news-topstories'), ('Business', 'http://feeds.denverpost.com/dp-business'), ('Sports', 'http://feeds.denverpost.com/dp-sports'), ('Lifestyles', 'http://feeds.denverpost.com/dp-lifestyles'), ('Politics', 'http://feeds.denverpost.com/dp-politics'), ('Entertainment', 'http://feeds.denverpost.com/dp-entertainment'), ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] del img['height'] del img['width'] return soup