diff --git a/recipes/el_correo.recipe b/recipes/el_correo.recipe
index 7fa3f54b9f..2c1e603ccf 100644
--- a/recipes/el_correo.recipe
+++ b/recipes/el_correo.recipe
@@ -1,184 +1,115 @@
-#!/usr/bin/env python
-__license__ = 'GPL v3'
-__copyright__ = '08 Januery 2011, desUBIKado'
-__author__ = 'desUBIKado'
-__description__ = 'Daily newspaper from Biscay'
-__version__ = 'v0.14'
-__date__ = '10, September 2017'
'''
http://www.elcorreo.com/
'''
-import re
-import time
-
-from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe, classes
class elcorreo(BasicNewsRecipe):
- __author__ = 'desUBIKado'
- description = 'Daily newspaper from Biscay'
- title = u'El Correo'
- publisher = 'Vocento'
- category = 'News, politics, culture, economy, general interest'
- oldest_article = 1
- delay = 1
- max_articles_per_feed = 100
- no_stylesheets = True
- use_embedded_content = False
- masthead_url = 'http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
+ title = 'El Correo'
+ __author__ = 'unkn0wn'
+ description = 'Daily newspaper in Bilbao and the Basque Country of northern Spain'
+ oldest_article = 1 # days
language = 'es'
- timefmt = '[%a, %d %b, %Y]'
+ no_stylesheets = True
+ remove_attributes = ['height', 'width', 'style']
+ ignore_duplicate_articles = {'url'}
+ encoding = 'utf-8'
+ masthead_url = 'https://s1.ppllstatics.com/starfish/1.3.76/assets/images/logos/logo-elcorreo.svg'
encoding = 'utf-8'
remove_empty_feeds = True
- remove_javascript = True
+ resolve_internal_links = True
- feeds = [
- (u'Portada', u'http://www.elcorreo.com/rss/atom/portada'),
- (u'Mundo', u'http://www.elcorreo.com/rss/atom/?section=internacional'),
- (u'Bizkaia', u'http://www.elcorreo.com/rss/atom/?section=bizkaia'),
- (u'Guipuzkoa', u'http://www.elcorreo.com/rss/atom/?section=gipuzkoa'),
- (u'Araba', u'http://www.elcorreo.com/rss/atom/?section=araba'),
- (u'La Rioja', u'http://www.elcorreo.com/rss/atom/?section=larioja'),
- (u'Miranda', u'http://www.elcorreo.com/rss/atom/?section=miranda'),
- (u'Economía', u'http://www.elcorreo.com/rss/atom/?section=economia'),
- (u'Culturas', u'http://www.elcorreo.com/rss/atom/?section=culturas'),
- (u'Politica', u'http://www.elcorreo.com/rss/atom/?section=politica'),
- (u'Tecnología', u'http://www.elcorreo.com/rss/atom/?section=tecnologia'),
- (u'Gente - Estilo', u'http://www.elcorreo.com/rss/atom/?section=gente-estilo'),
- (u'Planes', u'http://www.elcorreo.com/rss/atom/?section=planes'),
- (u'Athletic', u'http://www.elcorreo.com/rss/atom/?section=athletic'),
- (u'Alavés', u'http://www.elcorreo.com/rss/atom/?section=alaves'),
- (u'Bilbao Basket', u'http://www.elcorreo.com/rss/atom/?section=bilbaobasket'),
- (u'Baskonia', u'http://www.elcorreo.com/rss/atom/?section=baskonia'),
- (u'Deportes', u'http://www.elcorreo.com/rss/atom/?section=deportes'),
- (u'Jaiak', u'http://www.elcorreo.com/rss/atom/?section=jaiak'),
- (u'La Blanca', u'http://www.elcorreo.com/rss/atom/?section=la-blanca-vitoria'),
- (u'Aste Nagusia', u'http://www.elcorreo.com/rss/atom/?section=aste-nagusia-bilbao'),
- (u'Semana Santa', u'http://www.elcorreo.com/rss/atom/?section=semana-santa'),
- (u'Festivales', u'http://www.elcorreo.com/rss/atom/?section=festivales')
- ]
+ extra_css = '''
+ .v-mdl-ath__inf, .v-mdl-ath__p--2, .v-mdl-ath__p {font-size:small; color:#404040;}
+ .v-fc, .v-a-fig { text-align:center; font-size:small; }
+ #sub { font-style:italic; color:#202020; }
+ blockquote, em { color:#202020; }
+ img { display:block; margin:0 auto; }
+ '''
+
+ def get_cover_url(self):
+ from datetime import date
+ return 'https://portada.iperiodico.es/' + date.today().strftime('%Y/%m/%d') + '_elcorreo.750.jpg'
keep_only_tags = [
- dict(name='div', attrs={'class': ['col-xs-12 col-sm-12 col-md-8 col-lg-8']})
+ dict(name='article', attrs={'class': lambda x: x and set(x.split()).intersection(
+ {'v-a--d-bs', 'v-a--d-opn', 'v-a--d-rpg'})}),
+ classes(
+ 'v-d--ab-c v-d--rpg'
+ )
]
remove_tags = [
- dict(
- name='div',
- attrs={
- 'class': [
- 'voc-topics voc-detail-grid ', 'voc-newsletter ',
- 'voc-author-social'
- ]
- }
- ),
- dict(name='section', attrs={'class': ['voc-ficha-detail voc-file-sports']})
- ]
-
- remove_tags_before = dict(
- name='div', attrs={'class': 'col-xs-12 col-sm-12 col-md-8 col-lg-8'}
- )
- remove_tags_after = dict(
- name='div', attrs={'class': 'col-xs-12 col-sm-12 col-md-8 col-lg-8'}
- )
-
- _processed_links = []
-
- def get_article_url(self, article):
- link = article.get('link', None)
-
- if link is None:
- return article
-
- # modificamos la url de las noticias de los equipos deportivos para que funcionen, por ejemplo:
- # http://athletic.elcorreo.com/noticias/201407/27/muniain-estrella-athletic-para-20140727093046.html
- # http://m.elcorreo.com/noticias/201407/27/muniain-estrella-athletic-para-20140727093046.html?external=deportes/athletic
-
- parte = link.split('/')
-
- if parte[2] == 'athletic.elcorreo.com':
- link = 'http://www.elcorreo.com/' + parte[3] + '/' + parte[
- 4
- ] + '/' + parte[5] + '/' + parte[6] + '?external=deportes/athletic'
- else:
- if parte[2] == 'baskonia.elcorreo.com':
- link = 'http://www.elcorreo.com/' + parte[3] + '/' + parte[
- 4
- ] + '/' + parte[5] + '/' + parte[6] + '?external=deportes/baskonia'
- else:
- if parte[2] == 'bilbaobasket.elcorreo.com':
- link = 'http://www.elcorreo.com/' + parte[3] + '/' + parte[
- 4
- ] + '/' + parte[5] + '/' + parte[
- 6
- ] + '?external=deportes/bilbaobasket'
- else:
- if parte[2] == 'alaves.elcorreo.com':
- link = 'http://www.elcorreo.com/' + parte[3] + '/' + parte[
- 4
- ] + '/' + parte[5] + '/' + parte[
- 6
- ] + '?external=deportes/alaves'
-
- # A veces el mismo articulo aparece en la versión de Alava y en la de Bizkaia. Por ejemplo:
- # http://www.elcorreo.com/alava/deportes/motor/formula-1/201407/27/ecclestone-quiere-briatore-ayude-20140727140820-rc.html
- # http://www.elcorreo.com/bizkaia/deportes/motor/formula-1/201407/27/ecclestone-quiere-briatore-ayude-20140727140820-rc.html
- # para controlar los duplicados, unificamos las url para que sean siempre de bizkaia (excepto para la sección "araba")
-
- if ((parte[3] == 'alava') and (parte[4] != 'araba')):
- link = link.replace('elcorreo.com/alava', 'elcorreo.com/bizkaia')
-
- # Controlamos si el artículo ha sido incluido en otro feed para eliminarlo
-
- if link not in self._processed_links:
- self._processed_links.append(link)
- else:
- link = None
-
- return link
-
- # Recuperamos la portada de papel (la imagen format=1 tiene mayor resolucion)
-
- def get_cover_url(self):
- cover = None
- st = time.localtime()
- year = str(st.tm_year)
- month = "%.2d" % st.tm_mon
- day = "%.2d" % st.tm_mday
- # http://info.elcorreo.com/pdf/07082013-viz.pdf
- cover = 'http://info.elcorreo.com/pdf/' + day + month + year + '-viz.pdf'
- br = BasicNewsRecipe.get_browser(self)
- try:
- br.open(cover)
- except:
- self.log("\nPortada no disponible")
- cover = 'http://www.elcorreo.com/vizcaya/noticias/201002/02/Media/logo-elcorreo-nuevo.png'
- return cover
-
- # Para cambiar el estilo del texto
-
- extra_css = '''
- h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:28px;}
- h2 {font-family:georgia,serif; font-style:italic; font-weight:normal;font-size:16px;color:#4D4D4D;}
- h3 {font-family:georgia,serif; font-weight:bold;font-size:18px;}
- '''
-
- preprocess_regexps = [
-
- # Para presentar la imagen de los video incrustados
- (
- re.compile(r'stillURLVideo: \'', re.DOTALL | re.IGNORECASE),
- lambda match: ',
- (
- re.compile(r'.jpg\',', re.DOTALL | re.IGNORECASE),
- lambda match: '.jpg)