From b87d3eb24882beb16cf2e33bb736884a8f6aa491 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 27 Dec 2010 10:17:27 -0700 Subject: [PATCH] Updated Heraldo de Aragon --- resources/recipes/heraldo.recipe | 73 +++++++++++++++++++------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/resources/recipes/heraldo.recipe b/resources/recipes/heraldo.recipe index 381e97b9ce..c5669e116b 100644 --- a/resources/recipes/heraldo.recipe +++ b/resources/recipes/heraldo.recipe @@ -1,50 +1,65 @@ #!/usr/bin/env python -__license__ = 'GPL v3' -__author__ = 'Lorenzo Vigentini' -__copyright__ = '2009, Lorenzo Vigentini ' +__license__ = 'GPL v3' +__copyright__ = '04 December 2010, desUBIKado' +__author__ = 'desUBIKado' __description__ = 'Daily newspaper from Aragon' -__version__ = 'v1.01' -__date__ = '30, January 2010' - +__version__ = 'v0.03' +__date__ = '11, December 2010' ''' -http://www.heraldo.es/ +[url]http://www.heraldo.es/[/url] ''' +import time from calibre.web.feeds.news import BasicNewsRecipe class heraldo(BasicNewsRecipe): - author = 'Lorenzo Vigentini' + __author__ = 'desUBIKado' description = 'Daily newspaper from Aragon' - - cover_url = 'http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo.gif' title = u'Heraldo de Aragon' publisher = 'OJD Nielsen' category = 'News, politics, culture, economy, general interest' - language = 'es' timefmt = '[%a, %d %b, %Y]' - oldest_article = 1 - max_articles_per_feed = 25 - + max_articles_per_feed = 100 use_embedded_content = False - recursion = 10 - remove_javascript = True no_stylesheets = True - - keep_only_tags = [ - dict(name='div', attrs={'class':['titularNoticiaNN','textoGrisVerdanaContenidos']}) - ] + recursion = 10 feeds = [ - (u'Portadas ', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') - ] + (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') + ] + + + + keep_only_tags = [dict(name='div', attrs={'id':['dts','com']})] + + remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}), + dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}), + dict(name='form', attrs={'class':'form'})] + + remove_tags_before = dict(name='div' , attrs={'id':'dts'}) + remove_tags_after = dict(name='div' , attrs={'id':'com'}) + + def get_cover_url(self): + cover = None + st = time.localtime() + year = str(st.tm_year) + month = "%.2d" % st.tm_mon + day = "%.2d" % st.tm_mday + #[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url] + cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf' + br = BasicNewsRecipe.get_browser() + try: + br.open(cover) + except: + self.log("\nPortada no disponible") + cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png' + return cover + + + extra_css = ''' - .articledate {color: gray;font-family: monospace;} - .articledescription {display: block;font-family: sans;font-size: 0.7em; text-indent: 0;} - .firma {color: #666;display: block;font-family: verdana, arial, helvetica;font-size: 1em;margin-bottom: 8px;} - .textoGrisVerdanaContenidos {color: #56595c;display: block;font-family: Verdana;font-size: 1.28571em;padding-bottom: 10px} - .titularNoticiaNN {display: block;padding-bottom: 10px;padding-left: 0;padding-right: 0;padding-top: 4px} - .titulo {color: #003066;font-family: Tahoma;font-size: 1.92857em;font-weight: bold;line-height: 1.2em} - ''' + h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;} + '''