From f4703f602bbbf669b8b5de5ad1d69138163dc286 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 30 Dec 2016 08:36:52 +0530 Subject: [PATCH] Update Clarin. Fixes #1653113 [Updated recipe for Clarin](https://bugs.launchpad.net/calibre/+bug/1653113) --- recipes/clarin.recipe | 36 +++++++++++++++++++++++------------- recipes/icons/clarin.png | Bin 600 -> 684 bytes 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe index a5b57875e0..616cb7c8cd 100644 --- a/recipes/clarin.recipe +++ b/recipes/clarin.recipe @@ -1,6 +1,10 @@ +#!/usr/bin/env python2 +# -*- mode: python -*- +# -*- coding: utf-8 -*- + from __future__ import unicode_literals __license__ = 'GPL v3' -__copyright__ = '2008-2015, Darko Miletic ' +__copyright__ = '2008-2016, Darko Miletic ' ''' clarin.com ''' @@ -27,9 +31,8 @@ class Clarin(BasicNewsRecipe): needs_subscription = 'optional' INDEX = 'http://www.clarin.com' LOGIN = 'https://app-pase.clarin.com/pase-registracion/app/pase/ingresarNavegable?execution=e1s1' - masthead_url = 'http://www.clarin.com/static/CLAClarinV3/images/logo.png' - cover_url = strftime( - 'http://tapas.clarin.com/tapa/%Y/%m/%d/%Y%m%d_thumb.jpg') + masthead_url = 'http://www.clarin.com/images/logo_clarin.svg' + cover_url = strftime('http://tapas.clarin.com/tapa/%Y/%m/%d/%Y%m%d_thumb.jpg') extra_css = """ body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} @@ -41,18 +44,24 @@ class Clarin(BasicNewsRecipe): 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } - remove_tags_before = dict(attrs={'class': 'int-nota-title'}) + keep_only_tags = [ + dict(name='p' , attrs={'class' : 'volanta'}), + dict(name='h1' , attrs={'itemprop': 'headline'}), + dict(name='div', attrs={'class' : 'bajada'}), + dict(name='div', attrs={'class' : 'body-nota'}) + ] remove_tags = [ dict(name=['meta', 'base', 'link', 'iframe', 'embed', 'object']), - dict(attrs={'class': ['tags-bar', 'breadcrumb', 'share-bar']}), - dict(attrs={'id': ['relacionadas']}) + dict(attrs={'class': ['tags-bar', 'breadcrumb', 'share-bar', 'share']}), + dict(name='div', attrs={'class': lambda x: x and 'r-nota' in x.split()}), + dict(attrs={'id': ['relacionadas']}), + dict(name='a', attrs={'class':'content-new'}) ] remove_tags_after = dict(name='div', attrs={'id': 'relacionadas'}) remove_attributes = ['lang'] feeds = [ - - (u'Pagina principal', u'http://www.clarin.com/rss/'), + (u'Lo Ultimo', u'http://www.clarin.com/rss/lo-ultimo/'), (u'Politica', u'http://www.clarin.com/rss/politica/'), (u'Deportes', u'http://www.clarin.com/rss/deportes/'), (u'Mundo', u'http://www.clarin.com/rss/mundo/'), @@ -67,10 +76,11 @@ class Clarin(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self) br.open(self.INDEX) if self.username is not None and self.password is not None: - data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm', 'ingresar_ingresar_email_paseInputComponent': self.username, 'ingresar_ingresar_palabraClave_paseInputComponent': self.password, 'ingresar_ingresar_ingresar_paseButton': 'Ingresar', 'javax.faces.ViewState': 'e1s1' # noqa + data = urllib.urlencode({'ingresar_ingresar_paseForm': 'ingresar_ingresar_paseForm', + 'ingresar_ingresar_email_paseInputComponent': self.username, + 'ingresar_ingresar_palabraClave_paseInputComponent': self.password, + 'ingresar_ingresar_ingresar_paseButton': 'Ingresar', + 'javax.faces.ViewState': 'e1s1' # noqa }) br.open(self.LOGIN, data) return br - - def get_article_url(self, article): - return article.get('guid', None) diff --git a/recipes/icons/clarin.png b/recipes/icons/clarin.png index 258d2e16cb5b0ab032a90fbc3110f84eae34f091..98767b97f2d4c64d84869b53cfb6ed40ccd01bb2 100644 GIT binary patch delta 661 zcmV;G0&4x(1gr&+BYyw{b3#c}2nYxWd{8i`T3U*g_TFX5K_KlrtWg~4QFo>`>q!-x zHA6(6t$+C|&?hQNGC7gzBn6VBM8CA7RqE)O-{|;RB|#ps%w((b(!{qd44ZUFlJs?0 znw-wDlTgwz%~CW!o!Eh;6Du`J>dcbg4=EL0p(Kuc*2}ZlQAyN1=s|TBYy%PNklE4^L9ldBK5aTjqv&bWqLn}kf}jP_ z%K#Py6$B8_CkT3CK@bq5A|M#OFZOnu*-SluTaAg?h^$%tQKg7f&((dDBa|udL)3F! z=Xc-Sj#{mjk?2mTo2IT+Dms1gw2lu9>CMGu4cPx@9QAs=EPvs@0RTQ-ze%!Z0M#>y z+O?nZ#z#!RVl9F02IuD!8H|(Z-&)vRD!sX@V z@oC=7Jw)en{KTXp&s3{bDl(d*xD?T)gh3q|Ai(dh-^iZ7OhRfK@@8Xf59;s~<$NB% z0_|vrF*vLfeSbbSX74p5h35;rod}cM+K2K4P`*)8y!%T&`}_8>|mJO za)kW-2dwVc*MOBj)L3Nl#vN*a98L#;Y+41!P=W2u&5KZeN4Y#iPMqR}qO!;j@7C<2=`64SJGLy=?=>%9bD_pl+q#uU{RR(z;mGfEJ)zGJgPM=qiZ<3Jy*H O0000