#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2009, Darko Miletic ' ''' oglobo.globo.com ''' from calibre.web.feeds.news import BasicNewsRecipe class OGlobo(BasicNewsRecipe): title = 'O Globo' __author__ = 'Darko Miletic and Carlos Laviola' description = 'News from Brasil' publisher = 'O Globo' category = 'news, politics, Brasil' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False cover_url = 'http://oglobo.globo.com/_img/o-globo.png' remove_javascript = True html2lrf_options = [ '--comment', description, '--category', category, '--publisher', publisher ] html2epub_options = 'publisher="' + publisher + \ '"\ncomments="' + description + '"\ntags="' + category + '"' extra_css = ''' cite{color:#007BB5; font-size:xx-small; font-style:italic;} body{font-family:Arial,Helvetica,sans-serif;font-size:x-small;} h3{font-size:large; color:#082963; font-weight:bold;} #ident{color:#0179B4; font-size:xx-small;} p{color:#000000;font-weight:normal;} .commentario p{color:#007BB5; font-style:italic;} ''' remove_tags = [ dict(name='script'), dict(name='form'), dict(name='div', attrs={ 'id': 'header'}), dict(name='p', attrs={'id': 'info-date-press'}) ] feeds = [ (u'Todos os canais', u'http://oglobo.globo.com/rss.xml?completo=true'), (u'Ciencia', u'http://oglobo.globo.com/rss.xml?secao=ciencia&completo=true'), (u'Educacao', u'http://oglobo.globo.com/rss.xml?secao=educacao&completo=true'), (u'Opiniao', u'http://oglobo.globo.com/rss.xml?secao=opiniao&completo=true'), (u'Cultura', u'http://oglobo.globo.com/rss.xml?secao=cultura&completo=true'), (u'Esportes', u'http://oglobo.globo.com/rss.xml?secao=esportes&completo=true'), (u'Mundo', u'http://oglobo.globo.com/rss.xml?secao=mundo&completo=true'), (u'Pais', u'http://oglobo.globo.com/rss.xml?secao=pais&completo=true'), (u'Rio', u'http://oglobo.globo.com/rss.xml?secao=rio&completo=true'), (u'Saude', u'http://oglobo.globo.com/rss.xml?secao=saude&completo=true'), (u'Economia', u'http://oglobo.globo.com/rss.xml?secao=economia&completo=true'), (u'Tecnologia', u'http://oglobo.globo.com/rss.xml?secao=tecnologia&completo=true') ] def print_version(self, url): return url + '?service=print' def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup language = 'pt_BR'