#!/usr/bin/env python __author__ = u'Jordi Balcells' __license__ = 'GPL v3' description = u'Jornal portugu\xeas - v1.03 (16 June 2010)' __docformat__ = 'restructuredtext en' ''' publico.pt ''' from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class PublicoPT(BasicNewsRecipe): description = u'Jornal portugu\xeas' cover_url = 'http://static.publico.pt/files/header/img/publico.gif' title = u'Publico.PT' category = 'News, politics, culture, economy, general interest' oldest_article = 2 no_stylesheets = True encoding = 'utf8' use_embedded_content = False language = 'pt' remove_empty_feeds = True extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' keep_only_tags = [ dict(id='story-content story-header'.split()), ] remove_tags = [ classes('social-tools'), ] remove_attributes = ['style'] feeds = [ (u'Geral', u'http://feeds.feedburner.com/publicoRSS'), (u'Mundo', u'http://feeds.feedburner.com/PublicoMundo'), (u'Pol\xedtica', u'http://feeds.feedburner.com/PublicoPolitica'), (u'Economia', u'http://feeds.feedburner.com/PublicoEconomia'), (u'Desporto', u'http://feeds.feedburner.com/PublicoDesporto'), (u'Sociedade', u'http://feeds.feedburner.com/PublicoSociedade'), (u'Educa\xe7\xe3o', u'http://feeds.feedburner.com/PublicoEducacao'), (u'Ci\xeancias', u'http://feeds.feedburner.com/PublicoCiencias'), (u'Ecosfera', u'http://feeds.feedburner.com/PublicoEcosfera'), (u'Cultura', u'http://feeds.feedburner.com/PublicoCultura'), (u'Local', u'http://feeds.feedburner.com/PublicoLocal'), (u'Tecnologia', u'http://feeds.feedburner.com/PublicoTecnologia') ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-media-viewer':True}): img['src'] = img['data-media-viewer'] return soup