calibre/recipes/el_pais.recipe
2019-04-10 16:46:50 +05:30

71 lines
2.5 KiB
Python

#!/usr/bin/env python2
__license__ = 'GPL v3'
__author__ = 'Jordi Balcells, based on an earlier version by Lorenzo Vigentini & Kovid Goyal'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
description = 'Main daily newspaper from Spain - v1.04 (19, October 2010)'
__docformat__ = 'restructuredtext en'
'''
elpais.es
'''
from calibre.web.feeds.news import BasicNewsRecipe
class ElPais(BasicNewsRecipe):
__author__ = 'Kovid Goyal & Lorenzo Vigentini & Jordi Balcells'
description = 'Main daily newspaper from Spain'
title = u'El Pais'
publisher = u'Ediciones El Pa\xeds SL'
category = 'News, politics, culture, economy, general interest'
language = 'es'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 15
use_embedded_content = False
recursion = 5
remove_javascript = True
no_stylesheets = True
keep_only_tags = [
dict(name='h1'), dict(itemprop=['articleBody', 'image', 'caption']),
dict(attrs={'class': ['articulo-subtitulos', 'articulo-apertura ']}),
]
feeds = [
(u'Titulares de portada', u'http://www.elpais.com/rss/feed.html?feedId=1022'),
(u'Internacional',
u'http://www.elpais.com/rss/feed.html?feedId=1001'),
(u'Espa\xf1a', u'http://www.elpais.com/rss/feed.html?feedId=1002'),
(u'Deportes', u'http://www.elpais.com/rss/feed.html?feedId=1007'),
(u'Econom\xeda',
u'http://www.elpais.com/rss/feed.html?feedId=1006'),
(u'Pol\xedtica',
u'http://www.elpais.com/rss/feed.html?feedId=17073'),
(u'Tecnolog\xeda',
u'http://www.elpais.com/rss/feed.html?feedId=1005'),
(u'Cultura', u'http://www.elpais.com/rss/feed.html?feedId=1008'),
(u'Gente', u'http://www.elpais.com/rss/feed.html?feedId=1009'),
(u'Sociedad', u'http://www.elpais.com/rss/feed.html?feedId=1004'),
(u'Opini\xf3n', u'http://www.elpais.com/rss/feed.html?feedId=1003'),
(u'Ciencia', u'http://www.elpais.com/rss/feed.html?feedId=17068'),
(u'Justicia y leyes',
u'http://www.elpais.com/rss/feed.html?feedId=17069'),
(u'Medio ambiente',
u'http://www.elpais.com/rss/feed.html?feedId=17071'),
(u'Vi\xf1etas', u'http://www.elpais.com/rss/feed.html?feedId=17058')
]
def preprocess_html(self, soup):
for img in soup.findAll('img', srcset=True):
try:
img['src'] = list(filter(None, img['srcset'].split()))[0]
except IndexError:
continue
return soup