#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2008, Darko Miletic ' ''' cronista.com ''' from calibre.web.feeds.news import BasicNewsRecipe class ElCronista(BasicNewsRecipe): title = 'El Cronista' __author__ = 'Darko Miletic' description = 'Noticias de Argentina' oldest_article = 2 language = 'es_AR' max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'cp1252' html2lrf_options = [ '--comment' , description , '--category' , 'news, Argentina' , '--publisher' , title ] keep_only_tags = [ dict(name='table', attrs={'width':'100%' }) ,dict(name='h1' , attrs={'class':'Arialgris16normal'}) ] remove_tags = [dict(name='a', attrs={'class':'Arialazul12'})] feeds = [ (u'Economia' , u'http://www.cronista.com/adjuntos/8/rss/Economia_EI.xml' ) ,(u'Negocios' , u'http://www.cronista.com/adjuntos/8/rss/negocios_EI.xml' ) ,(u'Ultimo momento' , u'http://www.cronista.com/adjuntos/8/rss/ultimo_momento.xml' ) ,(u'Finanzas y Mercados' , u'http://www.cronista.com/adjuntos/8/rss/Finanzas_Mercados_EI.xml' ) ,(u'Financial Times' , u'http://www.cronista.com/adjuntos/8/rss/FT_EI.xml' ) ,(u'Opinion edicion impresa' , u'http://www.cronista.com/adjuntos/8/rss/opinion_edicion_impresa.xml' ) ,(u'Socialmente Responsables', u'http://www.cronista.com/adjuntos/8/rss/Socialmente_Responsables.xml') ,(u'Asuntos Legales' , u'http://www.cronista.com/adjuntos/8/rss/asuntoslegales.xml' ) ,(u'IT Business' , u'http://www.cronista.com/adjuntos/8/rss/itbusiness.xml' ) ,(u'Management y RR.HH.' , u'http://www.cronista.com/adjuntos/8/rss/management.xml' ) ,(u'Inversiones Personales' , u'http://www.cronista.com/adjuntos/8/rss/inversionespersonales.xml' ) ] def print_version(self, url): main, sep, rest = url.partition('.com/notas/') article_id, lsep, rrest = rest.partition('-') return 'http://www.cronista.com/interior/index.php?p=imprimir_nota&idNota=' + article_id def preprocess_html(self, soup): mtag = '' soup.head.insert(0,mtag) soup.head.base.extract() htext = soup.find('h1',attrs={'class':'Arialgris16normal'}) htext.name = 'p' soup.prettify() return soup def get_cover_url(self): cover_url = None index = 'http://www.cronista.com/contenidos/' soup = self.index_to_soup(index + 'ee.html') link_item = soup.find('a',attrs={'href':"javascript:Close()"}) if link_item: cover_url = index + link_item.img['src'] return cover_url