diff --git a/recipes/publico.recipe b/recipes/publico.recipe index e64cb17883..0361dbbad8 100644 --- a/recipes/publico.recipe +++ b/recipes/publico.recipe @@ -11,6 +11,12 @@ publico.pt from calibre.web.feeds.news import BasicNewsRecipe +def classes(classes): + q = frozenset(classes.split(' ')) + return dict(attrs={ + 'class': lambda x: x and frozenset(x.split()).intersection(q)}) + + class PublicoPT(BasicNewsRecipe): description = u'Jornal portugu\xeas' cover_url = 'http://static.publico.pt/files/header/img/publico.gif' @@ -24,9 +30,13 @@ class PublicoPT(BasicNewsRecipe): remove_empty_feeds = True extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' - keep_only_tags = [dict(attrs={'class': ['hentry article single']})] - remove_tags = [dict(attrs={'class': ['entry-options entry-options-above group', - 'entry-options entry-options-below group', 'module tag-list']})] + keep_only_tags = [ + dict(id='story-content story-header'.split()), + ] + remove_tags = [ + classes('social-tools'), + ] + remove_attributes = ['style'] feeds = [ (u'Geral', u'http://feeds.feedburner.com/publicoRSS'), @@ -42,3 +52,8 @@ class PublicoPT(BasicNewsRecipe): (u'Local', u'http://feeds.feedburner.com/PublicoLocal'), (u'Tecnologia', u'http://feeds.feedburner.com/PublicoTecnologia') ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-media-viewer':True}): + img['src'] = img['data-media-viewer'] + return soup