Update Heraldo de Aragon

This commit is contained in:
Kovid Goyal 2014-08-18 19:27:16 +05:30
parent b0f52e2a4d
commit 38fa804e61

View File

@ -3,8 +3,8 @@ __license__ = 'GPL v3'
__copyright__ = '04 December 2010, desUBIKado' __copyright__ = '04 December 2010, desUBIKado'
__author__ = 'desUBIKado' __author__ = 'desUBIKado'
__description__ = 'Daily newspaper from Aragon' __description__ = 'Daily newspaper from Aragon'
__version__ = 'v0.06' __version__ = 'v0.07'
__date__ = '01, December 2013' __date__ = '17, August 2014'
''' '''
http://www.heraldo.es/ http://www.heraldo.es/
''' '''
@ -31,17 +31,19 @@ class heraldo(BasicNewsRecipe):
no_stylesheets = True no_stylesheets = True
feeds = [ feeds = [
(u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') (u'Noticias', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss')
] ]
keep_only_tags = [dict(name='div', attrs={'class':['row-f2 brd-row-f4 bck-row-f1-f1 padd-t padd-btt con n-marg-btt']}), keep_only_tags = [dict(name='div', attrs={'class':['row-f2 brd-row-f4 bck-row-f1-f1 padd-t padd-btt con n-marg-btt']}),
dict(name='div', attrs={'id':['dts','com']})] dict(name='div', attrs={'id':['dts','com']}),
dict(name='img', attrs={'class':['lazy']})]
remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df','next_com']}), remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df','next_com']}),
dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con','col5-f1','tit txt-wh f-s con','con cont-top ']}), dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con','col5-f1','tit txt-wh f-s con',
dict(name='div', attrs={'id':['cont-Top-8760','caj-pub','8760-cpt1']}), 'con cont-top ','col5-f1 flo-l','cnt-rel brr','caj_part con','caj_topic con']}),
dict(name='div', attrs={'id':['cont-Top-8760','caj-pub','8760-cpt1','caj_topic con','slider-oferplan','cont-Top-']}),
dict(name='form', attrs={'class':'form'}), dict(name='form', attrs={'class':'form'}),
dict(name='ul', attrs={'class':['tabs-nav','lst-not-f2 con']}), dict(name='ul', attrs={'class':['tabs-nav','men_nav con hg_2n','lst-not-f2 con ']}),
dict(name='span', attrs={'class':['flo-r']}), dict(name='span', attrs={'class':['flo-r']}),
dict(name='ul', attrs={'id':['cont-tags','pag-1','pag-cnt-I-']})] dict(name='ul', attrs={'id':['cont-tags','pag-1','pag-cnt-I-']})]
@ -65,16 +67,20 @@ class heraldo(BasicNewsRecipe):
return cover return cover
extra_css = ''' extra_css = '''
.con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;} h1 {font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:28px;}
.con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} h2 {font-family:georgia,serif; font-style:italic; font-weight:normal;font-size:22px;color:#4D4D4D;}
.con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;} .ladillo {font-family:georgia,serif; font-weight:bold;font-size:18px;}
.ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} .firm, .sp, .fech, ".com flo-r" {font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:12px;}
img{margin-bottom: 0.4em} img{margin-bottom: 0.4em}
''' '''
preprocess_regexps = [ preprocess_regexps = [
# Para separar los comentarios con una linea en blanco # Para separar los comentarios con una linea en blanco
(re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"') (re.compile(r'<div class="tit-f2">', re.DOTALL|re.IGNORECASE), lambda match: '<br /><br /><div class="tit-f2">'),
(re.compile(r'<div id="com"', re.DOTALL|re.IGNORECASE), lambda match: '<br><div id="com"'),
# Para ver las imágenes de las noticias
(re.compile(r'<img class="lazy" data-original="', re.DOTALL|re.IGNORECASE), lambda match: '<img src="http://www.heraldo.es')
] ]