From 9fff468d7415adc11f48c99fd6ffb3c6428a8577 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 9 Jan 2011 11:47:15 -0700 Subject: [PATCH] Update Heraldo de Aragon --- resources/recipes/heraldo.recipe | 58 +++++++++++++++++++------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/resources/recipes/heraldo.recipe b/resources/recipes/heraldo.recipe index c5669e116b..f3236ec4a9 100644 --- a/resources/recipes/heraldo.recipe +++ b/resources/recipes/heraldo.recipe @@ -3,29 +3,31 @@ __license__ = 'GPL v3' __copyright__ = '04 December 2010, desUBIKado' __author__ = 'desUBIKado' __description__ = 'Daily newspaper from Aragon' -__version__ = 'v0.03' -__date__ = '11, December 2010' +__version__ = 'v0.04' +__date__ = '6, Januery 2011' ''' [url]http://www.heraldo.es/[/url] ''' import time +import re from calibre.web.feeds.news import BasicNewsRecipe class heraldo(BasicNewsRecipe): - __author__ = 'desUBIKado' - description = 'Daily newspaper from Aragon' + __author__ = 'desUBIKado' + description = 'Daily newspaper from Aragon' title = u'Heraldo de Aragon' publisher = 'OJD Nielsen' category = 'News, politics, culture, economy, general interest' language = 'es' timefmt = '[%a, %d %b, %Y]' - oldest_article = 1 + oldest_article = 2 + delay = 1 max_articles_per_feed = 100 use_embedded_content = False remove_javascript = True no_stylesheets = True - recursion = 10 + feeds = [ (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') @@ -37,29 +39,39 @@ class heraldo(BasicNewsRecipe): remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}), dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}), - dict(name='form', attrs={'class':'form'})] + dict(name='form', attrs={'class':'form'}), + dict(name='ul', attrs={'id':['cont-tags','pag-1']})] remove_tags_before = dict(name='div' , attrs={'id':'dts'}) remove_tags_after = dict(name='div' , attrs={'id':'com'}) def get_cover_url(self): - cover = None - st = time.localtime() - year = str(st.tm_year) - month = "%.2d" % st.tm_mon - day = "%.2d" % st.tm_mday + cover = None + st = time.localtime() + year = str(st.tm_year) + month = "%.2d" % st.tm_mon + day = "%.2d" % st.tm_mday #[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url] - cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf' - br = BasicNewsRecipe.get_browser() - try: - br.open(cover) - except: - self.log("\nPortada no disponible") - cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png' - return cover - + cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf' + br = BasicNewsRecipe.get_browser() + try: + br.open(cover) + except: + self.log("\nPortada no disponible") + cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png' + return cover extra_css = ''' - h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:xx-large;} - ''' + .con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;} + .con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} + .con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;} + .ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} + img{margin-bottom: 0.4em} + ''' + + preprocess_regexps = [ + +# To separate the comments with a blank line + (re.compile(r'