Update publico.pt

This commit is contained in:
Kovid Goyal 2017-01-09 20:12:00 +05:30
parent fbabeb1f59
commit e6bd7ad712

View File

@ -11,6 +11,12 @@ publico.pt
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class PublicoPT(BasicNewsRecipe): class PublicoPT(BasicNewsRecipe):
description = u'Jornal portugu\xeas' description = u'Jornal portugu\xeas'
cover_url = 'http://static.publico.pt/files/header/img/publico.gif' cover_url = 'http://static.publico.pt/files/header/img/publico.gif'
@ -24,9 +30,13 @@ class PublicoPT(BasicNewsRecipe):
remove_empty_feeds = True remove_empty_feeds = True
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
keep_only_tags = [dict(attrs={'class': ['hentry article single']})] keep_only_tags = [
remove_tags = [dict(attrs={'class': ['entry-options entry-options-above group', dict(id='story-content story-header'.split()),
'entry-options entry-options-below group', 'module tag-list']})] ]
remove_tags = [
classes('social-tools'),
]
remove_attributes = ['style']
feeds = [ feeds = [
(u'Geral', u'http://feeds.feedburner.com/publicoRSS'), (u'Geral', u'http://feeds.feedburner.com/publicoRSS'),
@ -42,3 +52,8 @@ class PublicoPT(BasicNewsRecipe):
(u'Local', u'http://feeds.feedburner.com/PublicoLocal'), (u'Local', u'http://feeds.feedburner.com/PublicoLocal'),
(u'Tecnologia', u'http://feeds.feedburner.com/PublicoTecnologia') (u'Tecnologia', u'http://feeds.feedburner.com/PublicoTecnologia')
] ]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-media-viewer':True}):
img['src'] = img['data-media-viewer']
return soup