diff --git a/resources/images/news/clarin.png b/resources/images/news/clarin.png index f08bc3e5ac..2ef634678e 100644 Binary files a/resources/images/news/clarin.png and b/resources/images/news/clarin.png differ diff --git a/resources/recipes/clarin.recipe b/resources/recipes/clarin.recipe index 3a96bca162..7bbb663d1d 100644 --- a/resources/recipes/clarin.recipe +++ b/resources/recipes/clarin.recipe @@ -5,7 +5,6 @@ __copyright__ = '2008-2010, Darko Miletic ' clarin.com ''' -from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class Clarin(BasicNewsRecipe): @@ -18,11 +17,12 @@ class Clarin(BasicNewsRecipe): max_articles_per_feed = 100 use_embedded_content = False no_stylesheets = True - cover_url = strftime('http://www.clarin.com/diario/%Y/%m/%d/portada.jpg') - encoding = 'cp1252' - language = 'es' - masthead_url = 'http://www.clarin.com/shared/v10/img/Hd/lg_Clarin.gif' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,"Times New Roman",Times,serif; font-size: xx-large} .Volan,.Pie,.Autor{ font-size: x-small} .Copete,.Hora{font-size: large} ' + encoding = 'utf8' + language = 'es_AR' + publication_type = 'newspaper' + INDEX = 'http://www.clarin.com' + masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' + extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} ' conversion_options = { 'comment' : description @@ -31,27 +31,32 @@ class Clarin(BasicNewsRecipe): , 'language' : language } - remove_tags = [ - dict(name='a' , attrs={'class':'Imp' }) - ,dict(name='div' , attrs={'class':'Perma' }) - ,dict(name='h1' , text='Imprimir' ) - ] + keep_only_tags = [dict(attrs={'class':['hd','mt']})] feeds = [ - (u'Ultimo Momento', u'http://www.clarin.com/diario/hoy/um/sumariorss.xml') - ,(u'El Pais' , u'http://www.clarin.com/diario/hoy/elpais.xml' ) - ,(u'Opinion' , u'http://www.clarin.com/diario/hoy/opinion.xml' ) - ,(u'El Mundo' , u'http://www.clarin.com/diario/hoy/elmundo.xml' ) - ,(u'Sociedad' , u'http://www.clarin.com/diario/hoy/sociedad.xml' ) - ,(u'La Ciudad' , u'http://www.clarin.com/diario/hoy/laciudad.xml' ) - ,(u'Policiales' , u'http://www.clarin.com/diario/hoy/policiales.xml' ) - ,(u'Deportes' , u'http://www.clarin.com/diario/hoy/deportes.xml' ) + (u'Pagina principal', u'http://www.clarin.com/rss/' ) + ,(u'Politica' , u'http://www.clarin.com/rss/politica/' ) + ,(u'Deportes' , u'http://www.clarin.com/rss/deportes/' ) + ,(u'Economia' , u'http://www.clarin.com/economia/' ) + ,(u'Mundo' , u'http://www.clarin.com/rss/mundo/' ) + ,(u'Espectaculos' , u'http://www.clarin.com/rss/espectaculos/') + ,(u'Sociedad' , u'http://www.clarin.com/rss/sociedad/' ) + ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' ) + ,(u'Policiales' , u'http://www.clarin.com/rss/policiales/' ) + ,(u'Internet' , u'http://www.clarin.com/rss/internet/' ) + ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' ) ] def print_version(self, url): - rest = url.partition('-0')[-1] - lmain = rest.partition('.')[0] - lurl = u'http://www.servicios.clarin.com/notas/jsp/clarin/v9/notas/imprimir.jsp?pagid=' + lmain - return lurl + return url + '?print=1' + def get_cover_url(self): + cover_url = None + soup = self.index_to_soup(self.INDEX) + cover_item = soup.find('div',attrs={'class':'bb-md bb-md-edicion_papel'}) + if cover_item: + ap = cover_item.find('a',attrs={'href':'/edicion-impresa/'}) + if ap: + cover_url = self.INDEX + ap.img['src'] + return cover_url