From 648b73541ab46676e6e49a9ec4ee6288deb776c1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 25 Sep 2015 23:33:30 +0530 Subject: [PATCH] Update Infobae.com Fixes #1499818 [Updated recipe for Infobae](https://bugs.launchpad.net/calibre/+bug/1499818) --- recipes/icons/infobae.png | Bin 193 -> 270 bytes recipes/infobae.recipe | 52 +++++++++++--------------------------- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/recipes/icons/infobae.png b/recipes/icons/infobae.png index 959efa4f515d053ee456c8450ff4a623155010a0..365789156e08412f050e646150a00a4d61cd0b90 100644 GIT binary patch delta 253 zcmX@e*vB+MvYw5BfkFQB|3o0gmgMd3!tfsi7wla=87RV8;1OBOz`!jG!i)^F=12eq z*-JcqUD@w*N(!pT=k1Kv0t!i%xJHyX=jZ08=9Mrw7o{eaq^2m8XO?6rxO@5rgg5eu z0~M8dx;TbdoKF68_t=N|jZ6nxyqf~gl{Gf1zGh_hzQ)YvX32huH-YcwfddEh&jT@k z;^Zub!~g%^xOqhCLwT;k;s5>o694LJ4uCj*%o3WsE@E>M1!@}}{$I@ef#L7&D~%@1 q5>MYq0`bXyx30?vS1>xCWMJ5NUf#5;Pa+!V1O`u6KbLh*2~7ao`d$D4 delta 176 zcmV;h08jsp0>J^08Gix*005AYXf^-<0EkIMK~#9!wUU7jMNkYw`|16(`)BXf#z|a6 zNDNV(R=3daVZ9ZMF`!^hc>!$G4zMldgh$XeQgAo=ReQPmCRJ-E2+)eH diff --git a/recipes/infobae.recipe b/recipes/infobae.recipe index b577988347..426263846b 100644 --- a/recipes/infobae.recipe +++ b/recipes/infobae.recipe @@ -1,5 +1,5 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2011, Darko Miletic ' +__copyright__ = '2008-2015, Darko Miletic ' ''' infobae.com ''' @@ -16,9 +16,9 @@ class Infobae(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False + auto_cleanup = False language = 'es_AR' encoding = 'utf8' - masthead_url = 'http://www.infobae.com/media/img/static/logo-infobae.gif' remove_empty_feeds = True extra_css = ''' body{font-family: Arial,Helvetica,sans-serif} @@ -32,43 +32,21 @@ class Infobae(BasicNewsRecipe): , 'publisher' : publisher , 'language' : language } - - keep_only_tags = [dict(attrs={'class':['titularnota','nota','post-title','post-entry','entry-title','entry-info','entry-content']})] - remove_tags_after = dict(attrs={'class':['interior-noticia','nota-desc','tags']}) + + remove_tags_before = dict(name='h1', attrs={'class':'entry-title'}) + remove_tags_after = dict(name='div', attrs={'class':'narrowcontent'}) remove_tags = [ dict(name=['base','meta','link','iframe','object','embed','ins']) - ,dict(attrs={'class':['barranota','tags']}) + ,dict(attrs={'class':['social-hori clearfix','narrowcontent','embed_cont type_freetext']}) + ,dict(attrs={'id':'captcha-modal'}) ] - + feeds = [ - (u'Saludable' , u'http://www.infobae.com/rss/saludable.xml') - ,(u'Economia' , u'http://www.infobae.com/rss/economia.xml' ) - ,(u'En Numeros', u'http://www.infobae.com/rss/rating.xml' ) - ,(u'Finanzas' , u'http://www.infobae.com/rss/finanzas.xml' ) - ,(u'Mundo' , u'http://www.infobae.com/rss/mundo.xml' ) - ,(u'Sociedad' , u'http://www.infobae.com/rss/sociedad.xml' ) - ,(u'Politica' , u'http://www.infobae.com/rss/politica.xml' ) - ,(u'Deportes' , u'http://www.infobae.com/rss/deportes.xml' ) + (u'Ahora' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/Infobae.xml') + ,(u'Economia' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/economia.xml') + ,(u'Cultura' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/Cultura.xml') + ,(u'Finanzas' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/finanzas.xml') + ,(u'Sociedad' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/sociedad.xml') + ,(u'Politica' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/politica.xml') + ,(u'Deportes' , u'http://cdn02.ib.infobae.com/adjuntos/162/rss/deportes.xml') ] - - def preprocess_html(self, soup): - for item in soup.findAll(style=True): - del item['style'] - for item in soup.findAll('a'): - limg = item.find('img') - if item.string is not None: - str = item.string - item.replaceWith(str) - else: - if limg: - item.name = 'div' - item.attrs = [] - else: - str = self.tag_to_string(item) - item.replaceWith(str) - for item in soup.findAll('img'): - if not item.has_key('alt'): - item['alt'] = 'image' - return soup - -