From 1cf5cbd00ffdb8452d092a1ef44fc19482a663ec Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 17 Jun 2015 03:06:16 +0530 Subject: [PATCH] Update El Mercurio Chile Fixes #1465795 [Updated recipe for El Mercurio](https://bugs.launchpad.net/calibre/+bug/1465795) --- recipes/el_mercurio_chile.recipe | 64 +++++++++++++++++++--------- recipes/icons/el_mercurio_chile.png | Bin 2370 -> 669 bytes 2 files changed, 44 insertions(+), 20 deletions(-) diff --git a/recipes/el_mercurio_chile.recipe b/recipes/el_mercurio_chile.recipe index df4d027af3..310ec9db35 100644 --- a/recipes/el_mercurio_chile.recipe +++ b/recipes/el_mercurio_chile.recipe @@ -1,23 +1,26 @@ +#!/usr/bin/env python2 +# -*- coding: latin-1 mode: python -*- + __license__ = 'GPL v3' -__copyright__ = '2009-2010, Darko Miletic ' +__copyright__ = '2009-2015, Darko Miletic ' +__docformat__ = 'restructuredtext es' ''' -emol.com +www.emol.com ''' from calibre.web.feeds.news import BasicNewsRecipe class ElMercurio(BasicNewsRecipe): - title = 'El Mercurio online' + title = 'Emol.com - El sitio de noticias online de Chile' __author__ = 'Darko Miletic' description = 'El sitio de noticias online de Chile' - publisher = 'El Mercurio' + publisher = 'El Mercurio S.A.P.' category = 'news, politics, Chile' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True - use_embedded_content = False - encoding = 'cp1252' - masthead_url = 'http://www.emol.com/especiales/logo_emol/logo_emol.gif' + encoding = 'utf8' + masthead_url = 'http://static.emol.cl/emol50/img/logo_emol.gif' remove_javascript = True use_embedded_content = False language = 'es_CL' @@ -30,21 +33,42 @@ class ElMercurio(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(name='div', attrs={'id':['cont_iz_titulobajada','cont_iz_creditos_1_a','cont_iz_cuerpo']})] + keep_only_tags = [ + dict(name='div', attrs={'class':['cont_iz_titulobajada','info-notaemol-por','info-notaemol-porfecha']}) + ,dict(name='div', attrs={'id':'texto_noticia'}) + ] remove_tags = [dict(name='div', attrs={'id':'cont_iz_cuerpo_relacionados'})] - remove_attributes = ['height','width'] feeds = [ - (u'Noticias de ultima hora', u'http://rss.emol.com/rss.asp?canal=0') - ,(u'Nacional', u'http://rss.emol.com/rss.asp?canal=1') - ,(u'Mundo', u'http://rss.emol.com/rss.asp?canal=2') - ,(u'Deportes', u'http://rss.emol.com/rss.asp?canal=4') - ,(u'Magazine', u'http://rss.emol.com/rss.asp?canal=6') - ,(u'Tecnologia', u'http://rss.emol.com/rss.asp?canal=5') + (u'Nacional' , u'http://www.emol.com/noticias/nacional/todas.aspx' ) + ,(u'Mundo' , u'http://www.emol.com/noticias/internacional/todas.aspx') + ,(u'Deportes' , u'http://www.emol.com/noticias/deportes/todas.aspx' ) + ,(u'Espectaculos', u'http://www.emol.com/noticias/cultura/todas.aspx' ) + ,(u'Tecnologia' , u'http://www.emol.com/noticias/economia/todas.aspx' ) ] - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - return soup - + def parse_index(self): + totalfeeds = [] + lfeeds = self.get_feeds() + for feedobj in lfeeds: + feedtitle, feedurl = feedobj + self.report_progress(0, _('Fetching feed')+' %s...'%(feedtitle if feedtitle else feedurl)) + articles = [] + soup = self.index_to_soup(feedurl) + arts = soup.find('div', attrs={'id':'caja_listado_noticia_todas'}) + if arts: + for item in arts.findAll('div', attrs={'class':'listado'}): + atag = item.find('a') + ptag = item.find('span') + url = atag['href'] + title = self.tag_to_string(atag) + description = self.tag_to_string(ptag) + #date,sep,rest = self.tag_to_string(ptag).partition('|') + articles.append({ + 'title' :title + ,'date' :'' + ,'url' :url + ,'description':description + }) + totalfeeds.append((feedtitle, articles)) + return totalfeeds diff --git a/recipes/icons/el_mercurio_chile.png b/recipes/icons/el_mercurio_chile.png index 1cba7a6aecd3a98ae8b11a605e1906d61c9ba299..3ca190ae4c9a23318e7c4a7797edf11199904040 100644 GIT binary patch delta 654 zcmV;90&)Gq5}gGhiBL{Q4GJ0x0000DNk~Le0000G0000G2nGNE03Y-JVUZyme+v@; z01FcV0GgZ_00007bV*G`2j2z|4;CUaK^1EN00JsWL_t(I%WadpPgFq^#(!sK_Q6Yt zfTAJq83kFaZSrB&T z?!9-$;;zds%Sk3@ax&*T-}%lNe|h@yy=PC8iuX15Ml+=#stXJp?x62rJ53o=joEU< z($7uimkaE!W`ZyTAf+`|$BuJ)pnK0s=T97BHkaqs^c>|dZe%bqHc*!w#g5o{1W2be z07Ly<*mBI&+u0oeYs(-~m;7dCfp?!4aTUJ(JssShJVPd>8S3w(6ogfse~b;v0BEJE z0Mnlr`MtgclEl_Fid-2Tq^Gl$*m>-K-9#|OQUD@&zavjeQ!1e?0ti|OqNF;Mgn@v7 zDcJyoI)HmOMjOYa6E?;ua;qD(w`R$v4d&OM0{QiABIg<&@3z=^p1=OU$GKISvuXD4 zYsP&2wnz}gc(=pg&f_V*e=p@hbfwj#ox+x7eq|GYqAgRjA;y*xv7~0%X1**i_hSu| zM#O+ph^RzVQGg`-DWF_r@sX|5;KHdR_&5X?;avnS##avBRn|MvP1=TUd6eeGllxp7 zKL>zwo`vN+&ei+_5mxel=^7cY7P>rn7vsYsD9yu1&p3AU04E1~WKl{O>}_1>?v56g zYXjb6T(}Kl$mMd3-+aLB>l0iWIZInx3m}M6fC3Tng(Aas01nCaUG$<1q73L1Vls$M oBRYeaTAxN#3baYQC`4)g0d9BOVOP+5H~;_u07*qoM6N<$f)uPG4FCWD delta 2369 zcmV-H3BLB71;P>`iBL{Q4GJ0x0000DNk~Le0000m0000m2nGNE09OL}hmj#1e*j1T z002k;M#*bF00009c5p#w0000m0000m0M71=R{#JAYe_^wRA}DqnJ_rOYNuu>WvFgQgfV~(Gv&Z#WIr-m0f{UK6Mf1iHIk3ZjW`R1B;@7{q`N5{uelQa}dG;(@+&c&-Sstuqv zMsaTe*Wh&xK;8b%`eD|4D`czBUe`#LAp>}OU2=JOg{S#ZBp#fgp;%_ma{m1CoY!YV zR5OBh!Px+Juc3yT4*dT8`hx8mBEn==ae4U$OEavR9mM$L7|Z|he^1|Wep+x6gp+5& zhYueb@HOy1$mB7DFCO#wamEKA%O3B14YDs7NCqEZ@!tPEG?_cE`ECj-Js&itl= zEc@Q=r@!sPm>mQ!N?wdP9p%IzeErq$_~q^nI`A1t230c3WF&LUeCK{5APA~L9+VIi zcPHJOFMv4|dEoTye~f?p-O$K#pA!4e0TDtzVl)~NqwxG>z+Df#WN^E1CO3&H8Rp)o z;=YOcz5`Xx6o^4N9psT#_Z5LnynECDW@VyG#_MshMWRo5G#pZu6?J{PQ-$3Me~1#|kQf6o2v47W$uGa$ z;dM)(u9?g#Dl^O)H)sO7Q(=ek0quV&jq6=a=3JZ(0k9o}RfS?WVrmsIdsqPt#d4r0 z#F!%@#30QfRl~xafe5o7ZYf*0>HOZk0gy)}54!arb7xj2q?dz4U^Y+8@5l?$13KF;~;%Ywz?p7#iZ zLB{mvCobNuttc(&X<1RkVY|k+yiWu|ELf!ZVFfe{mm-R2M3APM6fmpty2gP;?NrH0 zJq5alosV;FesIdlcDllN7%5`OWL}{n?HK|o^+LCOf4lmoeG^ovG@@~|v}*cjCjkUR z71gjhZE3ARXa6yTX^N; z4-L7e!ivNO62R$;Dfg9PXBw52FFRn?E^NWye}B;C(qmego{d|d92b$DUe|#*FtQ!H zZY(x;!m99p0Fd>4`r)A}yc!P~<&jxw50GW!oY+Iaac^XceG=w7tBzBr`@uWLBnaBs+SWCd>KG zKO4SL7r7g5X)A^AAYhm!%p&s&M(T(Nf8N#vB270{mDYUUv&ieKDHmr$V$e-H?U2M- z*H8c<2)Y6fm4TOOT>+Ri#yzBfRkKGLhfB8-vxsS6UKvHy7Hre6o(?0^X|<(7GUxTx z^tb?U7#4xcS7Rtk;>8tIt#T@avvVvCNhvi&qg5;{ZQWArJO|toZV5BRM3`4be+){{ zrp_4`k?|-pnU;v^Mpnc@7{ZFy)+J& z1C2b3BSOpxZHpvb7%v&$RpxLKDkEPV$cQV!!g$t%xBl6YEHik zJBk1_EvCi-jYC3+J4tjzg32nRJah_o2V*iPr6yvucM)Lblx1Q*n_$(fe<^SF5IFn~ z4iPNPdypc~q&t=M$z~2#ZPzJ>1z0C}2db)CwXJ4`rSd`W4-&9y`bgt&X}#M3_eg*y z=4b=9tG|y&-|n?;j;!nSJ_0-(d%Ye#gXebcP38Bdns_T2tV``i30PEV+E|8mC zpSo-Hqre|cpdm3_0*FRbe{&?n_V%Ldq1Vm)Ap%|T*$3W~2^E$~+^VC&e-wcgkuY2$ zf`*7_j%Yw6tP8oWqw5h_cXGbrHXZN=e1*FK{^0)vK8ZkI4G>ggLqbE7&qdqTa=Uu# zT1a>6fln=Pj<)(I6WAc)-U1ggc}6A!M3lC!SZmNh$|cZbuje*ylR5a>q8PnT;z zyNcuW=+qHas@dD$S|BZMigtln{y#N3JC3HntbYpU5VRU6JWMOn=05UKz zIV~_UEif`vFfuwZH##smD=;uRFfa+&`>g-~03~!qSafY~WNBu3Eo5PIWdJfTFgYzS nGA%GMR53U@F)%ta7ceU@Fgh?WAv))B00000NkvXXu0mjfbuC6(