#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2008, Darko Miletic ' ''' criticadigital.com ''' from calibre.web.feeds.news import BasicNewsRecipe class CriticaDigital(BasicNewsRecipe): title = 'Critica de la Argentina' __author__ = 'Darko Miletic and Sujata Raman' description = 'Noticias de Argentina' oldest_article = 2 max_articles_per_feed = 100 language = 'es_AR' no_stylesheets = True use_embedded_content = False encoding = 'cp1252' extra_css = ''' h1{font-family:"Trebuchet MS";} h3{color:#9A0000; font-family:Tahoma; font-size:x-small;} h2{color:#504E53; font-family:Arial,Helvetica,sans-serif ;font-size:small;} #epigrafe{font-family:Arial,Helvetica,sans-serif ;color:#666666 ; font-size:x-small;} p {font-family:Arial,Helvetica,sans-serif;} #fecha{color:#858585; font-family:Tahoma; font-size:x-small;} #autor{color:#858585; font-family:Tahoma; font-size:x-small;} #hora{color:#F00000;font-family:Tahoma; font-size:x-small;} ''' keep_only_tags = [ dict(name='div', attrs={'class':['bloqueTitulosNoticia','cfotonota']}) ,dict(name='div', attrs={'id':'boxautor'}) ,dict(name='p', attrs={'id':'textoNota'}) ] remove_tags = [ dict(name='div', attrs={'class':'box300' }) ,dict(name='div', style=True ) ,dict(name='div', attrs={'class':'titcomentario'}) ,dict(name='div', attrs={'class':'comentario' }) ,dict(name='div', attrs={'class':'paginador' }) ] feeds = [ (u'Politica', u'http://www.criticadigital.com/herramientas/rss.php?ch=politica' ) ,(u'Economia', u'http://www.criticadigital.com/herramientas/rss.php?ch=economia' ) ,(u'Deportes', u'http://www.criticadigital.com/herramientas/rss.php?ch=deportes' ) ,(u'Espectaculos', u'http://www.criticadigital.com/herramientas/rss.php?ch=espectaculos') ,(u'Mundo', u'http://www.criticadigital.com/herramientas/rss.php?ch=mundo' ) ,(u'Policiales', u'http://www.criticadigital.com/herramientas/rss.php?ch=policiales' ) ,(u'Sociedad', u'http://www.criticadigital.com/herramientas/rss.php?ch=sociedad' ) ,(u'Salud', u'http://www.criticadigital.com/herramientas/rss.php?ch=salud' ) ,(u'Tecnologia', u'http://www.criticadigital.com/herramientas/rss.php?ch=tecnologia' ) ,(u'Santa Fe', u'http://www.criticadigital.com/herramientas/rss.php?ch=santa_fe' ) ] def get_cover_url(self): cover_url = None index = 'http://www.criticadigital.com/impresa/' soup = self.index_to_soup(index) link_item = soup.find('div',attrs={'class':'tapa'}) if link_item: cover_url = index + link_item.img['src'] return cover_url