calibre/recipes/consumerist.recipe

__license__   = 'GPL v3'
__copyright__ = '2010, NA'
'''
consumerist.com
'''

from calibre.web.feeds.news import BasicNewsRecipe

class Consumerist(BasicNewsRecipe):
    title                 = 'Consumerist'
    __author__            = 'NA'
    description           = "Consumerist, Shoppers Bite Back."
    publisher             = 'consumerist.com'
    category              = 'news, consumer news, consumer rights'
    oldest_article        = 2
    max_articles_per_feed = 100
    no_stylesheets        = True
    encoding              = 'utf-8'
    use_embedded_content  = False
    language              = 'en'
    masthead_url          = 'http://consumerist.com/css/images/footer_man.gif'
    extra_css             = '''
	body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif}
	img{margin-bottom: 1em}
	h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large}
	h2{font-family :Arial,Helvetica,sans-serif; font-size:large}
	              '''
    conversion_options = {
                          'comment'   : description
                        , 'tags'      : category
                        , 'publisher' : publisher
                        , 'language'  : language
                        }

    remove_attributes  = ['width','height']
    #keep_only_tags     = [dict(attrs={'class':['', 'category-breadcrumb']}),]
    remove_tags_before = dict(name='h2')

    remove_tags = [
       #dict(name='iframe'),
       dict(name='div', attrs={'class':['e-comments', 'more-about', 'entry-tags']}),
       #dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}),
       #dict(name='ul', attrs={'class':'article-tools'}),
       #dict(name='ul', attrs={'class':'articleTools'}),
    ]

    remove_tags_after  = dict(attrs={'class':'e-body'})

    feeds = [(u'Articles', u'http://consumerist.com/index.xml')]

    def preprocess_html(self, soup):
        return self.adeify_images(soup)