#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' www.mercurynews.com ''' from calibre.web.feeds.news import BasicNewsRecipe class MercuryNews(BasicNewsRecipe): title = 'San Jose Mercury News' __author__ = 'Darko Miletic' description = 'News from San Jose' cover_url = 'http://webmedia.newseum.org/newseum-multimedia/dfp/jpg12/lg/CA_SJMN.jpg' publisher = 'San Jose Mercury News' category = 'news, politics, USA, San Jose, California' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' language = 'en' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ dict(name='h1'), dict(attrs={'class':['byline', 'time', 'article-body']}), dict(attrs={'class':lambda x: x and 'header-features' in x.split()}), ] feeds = [ ('News', 'http://www.mercurynews.com/feed/') ] def preprocess_html(self, soup, *a): for img in soup.findAll(name='img', attrs={'data-src':True}): img['src'] = img['data-src'] return soup