Update publico.pt

This commit is contained in:
Kovid Goyal 2017-01-09 20:12:00 +05:30
parent fbabeb1f59
commit e6bd7ad712

View File

@ -11,6 +11,12 @@ publico.pt
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class PublicoPT(BasicNewsRecipe):
description = u'Jornal portugu\xeas'
cover_url = 'http://static.publico.pt/files/header/img/publico.gif'
@ -24,9 +30,13 @@ class PublicoPT(BasicNewsRecipe):
remove_empty_feeds = True
extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} '
keep_only_tags = [dict(attrs={'class': ['hentry article single']})]
remove_tags = [dict(attrs={'class': ['entry-options entry-options-above group',
'entry-options entry-options-below group', 'module tag-list']})]
keep_only_tags = [
dict(id='story-content story-header'.split()),
]
remove_tags = [
classes('social-tools'),
]
remove_attributes = ['style']
feeds = [
(u'Geral', u'http://feeds.feedburner.com/publicoRSS'),
@ -42,3 +52,8 @@ class PublicoPT(BasicNewsRecipe):
(u'Local', u'http://feeds.feedburner.com/PublicoLocal'),
(u'Tecnologia', u'http://feeds.feedburner.com/PublicoTecnologia')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', attrs={'data-media-viewer':True}):
img['src'] = img['data-media-viewer']
return soup