__license__ = 'GPL v3' __copyright__ = '2010, NA' ''' consumerist.com ''' from calibre.web.feeds.news import BasicNewsRecipe class Consumerist(BasicNewsRecipe): title = 'Consumerist' __author__ = 'NA' description = "Consumerist, Shoppers Bite Back." publisher = 'consumerist.com' category = 'news, consumer news, consumer rights' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'en' masthead_url = 'http://consumerist.com/css/images/footer_man.gif' extra_css = ''' body{font-family: "Lucida Grande",Helvetica,Arial,sans-serif} img{margin-bottom: 1em} h1{font-family :Arial,Helvetica,sans-serif; font-size:x-large} h2{font-family :Arial,Helvetica,sans-serif; font-size:large} ''' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } remove_attributes = ['width','height'] #keep_only_tags = [dict(attrs={'class':['', 'category-breadcrumb']}),] remove_tags_before = dict(name='h2') remove_tags = [ #dict(name='iframe'), dict(name='div', attrs={'class':['e-comments', 'more-about', 'entry-tags']}), #dict(name='div', attrs={'id':['IEContainer', 'clickIncludeBox']}), #dict(name='ul', attrs={'class':'article-tools'}), #dict(name='ul', attrs={'class':'articleTools'}), ] remove_tags_after = dict(attrs={'class':'e-body'}) feeds = [(u'Articles', u'http://consumerist.com/index.xml')] def preprocess_html(self, soup): return self.adeify_images(soup)