import re from calibre.web.feeds.news import BasicNewsRecipe class PortalR7(BasicNewsRecipe): title = 'Noticias R7' __author__ = 'Diniz Bortolotto' description = 'Noticias Portal R7' oldest_article = 2 max_articles_per_feed = 20 encoding = 'utf8' publisher = 'Rede Record' category = 'news, Brazil' language = 'pt_BR' publication_type = 'newsportal' use_embedded_content = False no_stylesheets = True remove_javascript = True remove_attributes = ['style'] feeds = [ (u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'), (u'Economia', u'http://www.r7.com/data/rss/economia.xml'), (u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'), (u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml') ] reverse_article_order = True keep_only_tags = [dict(name='div', attrs={'class': 'materia'})] remove_tags = [ dict(id=['espalhe', 'report-erro']), dict(name='ul', attrs={'class': 'controles'}), dict(name='ul', attrs={'class': 'relacionados'}), dict(name='div', attrs={'class': 'materia_banner'}), dict(name='div', attrs={'class': 'materia_controles'}) ] preprocess_regexps = [ (re.compile(r'