calibre/recipes/chicago_breaking_news.recipe

#!/usr/bin/env  python2

__license__ = 'GPL v3'
__copyright__ = '2009, Darko Miletic <darko.miletic at gmail.com>'
'''
chicagobreakingnews.com
'''

from calibre.web.feeds.news import BasicNewsRecipe


class ChicagoBreakingNews(BasicNewsRecipe):
    title = 'Chicago Breaking News'
    __author__ = 'Darko Miletic'
    description = 'Breaking News from Chicago'
    oldest_article = 1
    max_articles_per_feed = 100
    no_stylesheets = True
    use_embedded_content = True
    publisher = 'Chicago Breaking News'
    category = 'news, politics, USA, Chicago'
    encoding = 'utf8'
    language = 'en'

    html2lrf_options = [
        '--comment', description, '--category', category, '--publisher', publisher
    ]

    html2epub_options = 'publisher="' + publisher + \
        '"\ncomments="' + description + '"\ntags="' + category + '"'

    feeds = [(u'Breaking news', u'http://feeds2.feedburner.com/ChicagoBreakingNews/')]

    def preprocess_html(self, soup):
        for item in soup.findAll('a'):
            if item['href'].find('http://feedads.googleadservices.com') > -1:
                item.extract()
        for item in soup.findAll(style=True):
            del item['style']
        for item in soup.findAll(color=True):
            del item['color']
        for item in soup.findAll(size=True):
            del item['size']
        return soup