Update El Correo

This commit is contained in:
Kovid Goyal 2016-07-29 10:55:21 +05:30
parent a585b65f3b
commit 25e60aa62a

View File

@ -3,8 +3,8 @@ __license__ = 'GPL v3'
__copyright__ = '08 Januery 2011, desUBIKado'
__author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Biscay'
__version__ = 'v0.11'
__date__ = '26, July 2014'
__version__ = 'v0.13'
__date__ = '28, July 2016'
'''
http://www.elcorreo.com/
'''
@ -58,21 +58,25 @@ class elcorreo(BasicNewsRecipe):
]
keep_only_tags = [
dict(name='ul', attrs={'class':['media-list']})
dict(name='article', attrs={'class':['story media-list ']})
]
remove_tags = [
dict(name='span', attrs={'class':['no-comments']})
dict(name='span', attrs={'class':['no-comments']}),
dict(name='div', attrs={'class':['compApoyosText compNoticiasR']})
]
remove_tags_before = dict(name='ul' , attrs={'class':'media-list'})
remove_tags_after = dict(name='ul' , attrs={'class':'media-list'})
remove_tags_before = dict(name='article' , attrs={'class':'story media-list '})
remove_tags_after = dict(name='article' , attrs={'class':'story media-list '})
# Usamos la versión para móviles
def print_version(self, url):
return url.replace('http://www.', 'http://m.')
_processed_links = []
def get_article_url(self, article):
@ -100,6 +104,7 @@ class elcorreo(BasicNewsRecipe):
if parte[2] == 'alaves.elcorreo.com':
link = 'http://m.elcorreo.com/' + parte[3] + '/' + parte[4] + '/' + parte[5] + '/' + parte[6] + '?external=deportes/alaves'
# A veces el mismo articulo aparece en la versión de Alava y en la de Bizkaia. Por ejemplo:
# http://www.elcorreo.com/alava/deportes/motor/formula-1/201407/27/ecclestone-quiere-briatore-ayude-20140727140820-rc.html
# http://www.elcorreo.com/bizkaia/deportes/motor/formula-1/201407/27/ecclestone-quiere-briatore-ayude-20140727140820-rc.html
@ -108,6 +113,7 @@ class elcorreo(BasicNewsRecipe):
if ((parte[3] == 'alava') and (parte[4] != 'araba')):
link = link.replace('elcorreo.com/alava', 'elcorreo.com/bizkaia')
# Controlamos si el artículo ha sido incluido en otro feed para eliminarlo
if not (link in self._processed_links):
@ -115,6 +121,7 @@ class elcorreo(BasicNewsRecipe):
else:
link = None
return link
# Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)