diff --git a/recipes/noticias_r7.recipe b/recipes/noticias_r7.recipe new file mode 100644 index 0000000000..b7495bb77e --- /dev/null +++ b/recipes/noticias_r7.recipe @@ -0,0 +1,40 @@ +import re +from calibre.web.feeds.news import BasicNewsRecipe + +class PortalR7(BasicNewsRecipe): + title = 'Noticias R7' + __author__ = 'Diniz Bortolotto' + description = 'Noticias Portal R7' + oldest_article = 2 + max_articles_per_feed = 20 + encoding = 'utf8' + publisher = 'Rede Record' + category = 'news, Brazil' + language = 'pt_BR' + publication_type = 'newsportal' + use_embedded_content = False + no_stylesheets = True + remove_javascript = True + remove_attributes = ['style'] + + feeds = [ + (u'Brasil', u'http://www.r7.com/data/rss/brasil.xml'), + (u'Economia', u'http://www.r7.com/data/rss/economia.xml'), + (u'Internacional', u'http://www.r7.com/data/rss/internacional.xml'), + (u'Tecnologia e Ci\xeancia', u'http://www.r7.com/data/rss/tecnologiaCiencia.xml') + ] + reverse_article_order = True + + keep_only_tags = [dict(name='div', attrs={'class':'materia'})] + remove_tags = [ + dict(id=['espalhe', 'report-erro']), + dict(name='ul', attrs={'class':'controles'}), + dict(name='ul', attrs={'class':'relacionados'}), + dict(name='div', attrs={'class':'materia_banner'}), + dict(name='div', attrs={'class':'materia_controles'}) + ] + + preprocess_regexps = [ + (re.compile(r'