#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '04 December 2010, desUBIKado' __author__ = 'desUBIKado' __description__ = 'Daily newspaper from Aragon' __version__ = 'v0.04' __date__ = '6, Januery 2011' ''' [url]http://www.heraldo.es/[/url] ''' import time import re from calibre.web.feeds.news import BasicNewsRecipe class heraldo(BasicNewsRecipe): __author__ = 'desUBIKado' description = 'Daily newspaper from Aragon' title = u'Heraldo de Aragon' publisher = 'OJD Nielsen' category = 'News, politics, culture, economy, general interest' language = 'es' timefmt = '[%a, %d %b, %Y]' oldest_article = 2 delay = 1 max_articles_per_feed = 100 use_embedded_content = False remove_javascript = True no_stylesheets = True feeds = [ (u'Portadas', u'http://www.heraldo.es/index.php/mod.portadas/mem.rss') ] keep_only_tags = [dict(name='div', attrs={'id':['dts','com']})] remove_tags = [dict(name='a', attrs={'class':['com flo-r','enl-if','enl-df']}), dict(name='div', attrs={'class':['brb-b-s con marg-btt','cnt-rel con']}), dict(name='form', attrs={'class':'form'}), dict(name='ul', attrs={'id':['cont-tags','pag-1']})] remove_tags_before = dict(name='div' , attrs={'id':'dts'}) remove_tags_after = dict(name='div' , attrs={'id':'com'}) def get_cover_url(self): cover = None st = time.localtime() year = str(st.tm_year) month = "%.2d" % st.tm_mon day = "%.2d" % st.tm_mday #[url]http://oldorigin-www.heraldo.es/20101211/primeras/portada_aragon.pdf[/url] cover='http://oldorigin-www.heraldo.es/'+ year + month + day +'/primeras/portada_aragon.pdf' br = BasicNewsRecipe.get_browser() try: br.open(cover) except: self.log("\nPortada no disponible") cover ='http://www.heraldo.es/MODULOS/global/publico/interfaces/img/logo-Heraldo.png' return cover extra_css = ''' .con strong{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:16px;} .con h2{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:30px;} .con span{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:12px;} .ent {font-family:Arial,Helvetica,sans-serif; font-weight:normal; font-style:italic; font-size:18px;} img{margin-bottom: 0.4em} ''' preprocess_regexps = [ # To separate the comments with a blank line (re.compile(r'