From 67adcb92f3e835ee0e320117d896854c652914ca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 9 Jun 2011 13:33:46 -0600 Subject: [PATCH] Updated AMbito and Ambito Financiero by DM. Fixes #795158 (Updated recipe for Ambito.com and new recipe for Ambito Fianciero both in spanish) --- recipes/ambito.recipe | 55 +++++++++--------- recipes/ambito_financiero.recipe | 87 ++++++++++++++++++++++++++++ recipes/icons/ambito_financiero.png | Bin 0 -> 508 bytes 3 files changed, 116 insertions(+), 26 deletions(-) create mode 100644 recipes/ambito_financiero.recipe create mode 100644 recipes/icons/ambito_financiero.png diff --git a/recipes/ambito.recipe b/recipes/ambito.recipe index dd92ee19b3..55a532bb9e 100644 --- a/recipes/ambito.recipe +++ b/recipes/ambito.recipe @@ -1,7 +1,5 @@ -#!/usr/bin/env python - __license__ = 'GPL v3' -__copyright__ = '2008-2009, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' ambito.com ''' @@ -11,51 +9,56 @@ from calibre.web.feeds.news import BasicNewsRecipe class Ambito(BasicNewsRecipe): title = 'Ambito.com' __author__ = 'Darko Miletic' - description = 'Informacion Libre las 24 horas' - publisher = 'Ambito.com' - category = 'news, politics, Argentina' + description = 'Ambito.com con noticias del Diario Ambito Financiero de Buenos Aires' + publisher = 'Editorial Nefir S.A.' + category = 'news, politics, economy, finances, Argentina' oldest_article = 2 - max_articles_per_feed = 100 no_stylesheets = True - encoding = 'iso-8859-1' - cover_url = 'http://www.ambito.com/img/logo_.jpg' - remove_javascript = True + encoding = 'cp1252' + masthead_url = 'http://www.ambito.com/img/logo_.jpg' use_embedded_content = False + language = 'es_AR' + publication_type = 'newsportal' + extra_css = """ + body{font-family: "Trebuchet MS",Verdana,sans-serif} + .volanta{font-size: small} + .t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698} + """ - html2lrf_options = [ - '--comment', description - , '--category', category - , '--publisher', publisher - ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } keep_only_tags = [dict(name='div', attrs={'align':'justify'})] - - remove_tags = [dict(name=['object','link'])] + remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])] + remove_attributes = ['align'] feeds = [ (u'Principales Noticias', u'http://www.ambito.com/rss/noticiasp.asp' ) ,(u'Economia' , u'http://www.ambito.com/rss/noticias.asp?S=Econom%EDa' ) ,(u'Politica' , u'http://www.ambito.com/rss/noticias.asp?S=Pol%EDtica' ) ,(u'Informacion General' , u'http://www.ambito.com/rss/noticias.asp?S=Informaci%F3n%20General') - ,(u'Agro' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' ) + ,(u'Campo' , u'http://www.ambito.com/rss/noticias.asp?S=Agro' ) ,(u'Internacionales' , u'http://www.ambito.com/rss/noticias.asp?S=Internacionales' ) ,(u'Deportes' , u'http://www.ambito.com/rss/noticias.asp?S=Deportes' ) ,(u'Espectaculos' , u'http://www.ambito.com/rss/noticias.asp?S=Espect%E1culos' ) - ,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnologia' ) - ,(u'Salud' , u'http://www.ambito.com/rss/noticias.asp?S=Salud' ) + ,(u'Tecnologia' , u'http://www.ambito.com/rss/noticias.asp?S=Tecnolog%EDa' ) ,(u'Ambito Nacional' , u'http://www.ambito.com/rss/noticias.asp?S=Ambito%20Nacional' ) ] def print_version(self, url): - return url.replace('http://www.ambito.com/noticia.asp?','http://www.ambito.com/noticias/imprimir.asp?') + return url.replace('/noticia.asp?','/noticias/imprimir.asp?') def preprocess_html(self, soup): - mtag = '' - soup.head.insert(0,mtag) for item in soup.findAll(style=True): del item['style'] + for item in soup.findAll('a'): + str = item.string + if str is None: + str = self.tag_to_string(item) + item.replaceWith(str) return soup - - language = 'es_AR' diff --git a/recipes/ambito_financiero.recipe b/recipes/ambito_financiero.recipe new file mode 100644 index 0000000000..08c056e8ee --- /dev/null +++ b/recipes/ambito_financiero.recipe @@ -0,0 +1,87 @@ +__license__ = 'GPL v3' +__copyright__ = '2011, Darko Miletic ' +''' +ambito.com/diario +''' + +import time +from calibre import strftime +from calibre.web.feeds.news import BasicNewsRecipe + +class Ambito_Financiero(BasicNewsRecipe): + title = 'Ambito Financiero' + __author__ = 'Darko Miletic' + description = 'Informacion Libre las 24 horas' + publisher = 'Editorial Nefir S.A.' + category = 'news, politics, economy, Argentina' + no_stylesheets = True + encoding = 'cp1252' + masthead_url = 'http://www.ambito.com/diario/img/logo_af.gif' + publication_type = 'newspaper' + needs_subscription = 'optional' + use_embedded_content = False + language = 'es_AR' + PREFIX = 'http://www.ambito.com' + INDEX = PREFIX + '/diario/index.asp' + LOGIN = PREFIX + '/diario/login/entrada.asp' + extra_css = """ + body{font-family: "Trebuchet MS",Verdana,sans-serif} + .volanta{font-size: small} + .t2_portada{font-size: xx-large; font-family: Georgia,serif; color: #026698} + """ + + conversion_options = { + 'comment' : description + , 'tags' : category + , 'publisher' : publisher + , 'language' : language + } + + keep_only_tags = [dict(name='div', attrs={'align':'justify'})] + remove_tags = [dict(name=['object','link','embed','iframe','meta','link','table','img'])] + remove_attributes = ['align'] + + def get_browser(self): + br = BasicNewsRecipe.get_browser() + br.open(self.INDEX) + if self.username is not None and self.password is not None: + br.open(self.LOGIN) + br.select_form(name='frmlogin') + br['USER_NAME'] = self.username + br['USER_PASS'] = self.password + br.submit() + return br + + def print_version(self, url): + return url.replace('/diario/noticia.asp?','/noticias/imprimir.asp?') + + def preprocess_html(self, soup): + for item in soup.findAll(style=True): + del item['style'] + for item in soup.findAll('a'): + str = item.string + if str is None: + str = self.tag_to_string(item) + item.replaceWith(str) + return soup + + def parse_index(self): + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('img',attrs={'class':'fotodespliegue'}) + if cover_item: + self.cover_url = self.PREFIX + cover_item['src'] + articles = [] + checker = [] + for feed_link in soup.findAll('a', attrs={'class':['t0_portada','t2_portada','bajada']}): + url = self.PREFIX + feed_link['href'] + title = self.tag_to_string(feed_link) + date = strftime("%a, %d %b %Y %H:%M:%S +0000",time.gmtime()) + if url not in checker: + checker.append(url) + articles.append({ + 'title' :title + ,'date' :date + ,'url' :url + ,'description':u'' + }) + return [(self.title, articles)] diff --git a/recipes/icons/ambito_financiero.png b/recipes/icons/ambito_financiero.png new file mode 100644 index 0000000000000000000000000000000000000000..e0a6f409cf01248d35328d5a43c3cb78981d2139 GIT binary patch literal 508 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`Y)RhkE)4%caKYZ?lYt_f1s;*b zK-vS0-A-oPfdtD69Mgd`SU*F|v9*U87#LMOT^vI!PA7j^WASS~BmaYE35hR!E~f=d zdXkm^1UtU}(RMz!q`RO}jg3j+maEvhpRfDvd1_a`UvOe`!sR6@C(kCV)ID3< z3A&u(`B5d!5AS_mzk?TKk_K03;)W|ihYY@eG{x8dIV#}((P>scQ&(Eb?H!g85?RTn zAEXy7KhnQQ(sF|Y5FBB>`BA!I&4bh5?F*JKZs$K|pfSIc_2l__$tfy3C3jc`9G*G5 zS#mq$^!N2iAol@HPwFU}==xDeuOX>nl8U)=?i#i`+CZxke|-9@AGSZ4mpSFEfPQRh z#m|52apDhnj#QuDpQIwy#HPR;q0_-#G$FlVo{)X>>Wva{|C1#O{@k(_IeADx^wORU zCFhfQoG&m|)iE$Mt40*-YTYsbhMsDPYeY$MQEFmIs%{F9U@$T;u+%j$)HSpSF|@QY tGO{u@)iyA&GBB9Bb5agWLvDUbW?Cfy4W~GsL;y7~c)I$ztaD0e0sy%tz9|3z literal 0 HcmV?d00001