From 55ff9d4a51af8e2948b1aee01e5980169eba353c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 4 Apr 2019 18:21:14 +0530 Subject: [PATCH] Update Le Temps --- recipes/le_temps.recipe | 247 ++++++++++++++-------------------------- 1 file changed, 83 insertions(+), 164 deletions(-) diff --git a/recipes/le_temps.recipe b/recipes/le_temps.recipe index 29b0e16249..4f180e7f4b 100644 --- a/recipes/le_temps.recipe +++ b/recipes/le_temps.recipe @@ -1,19 +1,17 @@ #!/usr/bin/env python2 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai -from __future__ import with_statement -from __future__ import print_function - -__license__ = 'GPL v3' -__copyright__ = '2009, Kovid Goyal ' -__docformat__ = 'restructuredtext en' - -# ------------------------------- -# Modified by Roland Kessi - February 2014 -# ------------------------------- +# License: GPLv3 Copyright: 2009, Kovid Goyal from calibre.web.feeds.news import BasicNewsRecipe +def classes(classes): + q = frozenset(classes.split(' ')) + return dict( + attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)} + ) + + class LeTemps(BasicNewsRecipe): title = u'Le Temps' oldest_article = 7 @@ -22,179 +20,100 @@ class LeTemps(BasicNewsRecipe): description = 'French news. Needs a subscription from https://www.letemps.ch' no_stylesheets = True remove_javascript = True - recursions = 1 encoding = 'UTF-8' - match_regexps = [r'https://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]'] language = 'fr' needs_subscription = True - simultaneous_downloads = 5 use_embedded_content = False remove_empty_feeds = True def get_browser(self): br = BasicNewsRecipe.get_browser(self) - br.open('https://www.letemps.ch/login') + br.open('https://www.letemps.ch/user/login') br.select_form(action='/user/login') br['name'] = self.username br['pass'] = self.password raw = br.submit().read() - if '>Login' in raw: - raise ValueError('Failed to login to letemps.ch. Check ' - 'your username and password') + if 'href="/subscribe"' in raw: + raise ValueError( + 'Failed to login to letemps.ch. Check ' + 'your username and password' + ) return br - def get_article_url(self, article): - ''' - Override in a subclass to customize extraction of the :term:`URL` that points - to the content for each article. Return the - article URL. It is called with `article`, an object representing a parsed article - from a feed. See `feedparser `_. - By default it looks for the original link (for feeds syndicated via a - service like feedburner or pheedo) and if found, - returns that or else returns - `article.link `_. - ''' - # ======================================================================= - # Avoid going through http://rss.feedsportal.com/... - # ======================================================================= - for key in article.keys(): - if key.endswith('_origlink'): - url = article[key] - if url and url.startswith('https://'): - print('Url is :', url) - return url - ans = article.get('link', None) - if not ans and getattr(article, 'links', None): - for item in article.links: - if item.get('rel', 'alternate') == 'alternate': - ans = item['href'] - break - pos = ans.find('letemps0Bch') - if pos > -1: - ans = 'http://www.' + ans[pos:] - ans = ans.replace('0A', '0') - ans = ans.replace('0B', '.') - ans = ans.replace('0C', '/') - ans = ans.replace('0E', '-') - return ans - keep_only_tags = [ - dict(name='div', attrs={'id': 'content'}), + classes('picture-cover article-content'), ] + remove_tags = [ - dict(name='div', attrs={'id': 'html5_gallery'}), - dict(name='ul', attrs={'class': ['tabs', 'social-buttons cf']}), - dict(name='img', attrs={'class': ['bigImg']}), - dict(name='div', attrs={'class': [ - 'box function', 'contentInserts', 'box banner', 'related-stories', - 'box additional', 'galleryOverview', 'position', 'rightAd', 'bottomAd', 'video', - ]}), + dict(name=['button', 'source']), + classes('share related-news'), ] - extra_css = ''' - h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;} - .headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;} - .summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;} - #capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;} - #content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} - .box.article.important{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} - #h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;} - .lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;} - p {margin: 0 0 10px 0;} - h3{font-size:small;font-weight:bold;} - .description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:white; } - a {color:#1B1B1B; font-size:small;} - .linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} - - h2{font-size:small;font-weight:bold;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;} - p.clear{clear:both;} - .heading{font-size:x-small;} - .heading strong{color:#940026;} - .box dd { clear:both; } - .box dl { position:relative; } - dl.caption {float:left;overflow:hidden;position:relative;margin: 0 10px 12px -40px;} - .caption dd p, - .caption dt img { margin-right: 0;margin-bottom: 0;} - .caption dt img {float: left;} - .caption dd {width: 100%;bottom: -1px;position: absolute;} - .caption dd .description {z-index: 2;margin-left: 0px;padding: 3px 4px;position: relative;} - ''' - feeds = [ (u'Actualité', u'https://letemps.ch/rss/site/'), - (u'Actualité - Monde', - u'https://letemps.ch/rss/site/actualite/monde'), - (u'Actualité - Suisse & régions', - u'https://letemps.ch/rss/site/actualite/suisse_regions'), - (u'Actualité - Sport', - u'https://letemps.ch/rss/site/actualite/sports'), - (u'Actualité - Sciences & Environnement', - u'https://letemps.ch/rss/site/actualite/sciences_environnement'), - (u'Actualité - Multimédia', - u'https://letemps.ch/rss/site/actualite/multimedia'), - (u'Actualité - Société', - u'https://letemps.ch/rss/site/actualite/societe'), - (u'Actualité - Société | Quoi de neuf', - u'https://letemps.ch/rss/site/actualite/societe/quoi_de_neuf'), - (u'Economie & Finance', - u'https://letemps.ch/rss/site/economie_finance'), - (u'Economie & Finance - Finance', - u'https://letemps.ch/rss/site/economie_finance/finance'), - (u'Economie & Finance - Fonds de placement', - u'https://letemps.ch/rss/site/economie_finance/fonds_placement'), - (u'Economie & Finance - Carrières', - u'https://letemps.ch/rss/site/economie_finance/carrieres'), - (u'Culture', u'http://letemps.ch/rss/site/culture'), - (u'Culture - Cinémas', - u'https://letemps.ch/rss/site/culture/cinema'), - (u'Culture - Musiques', - u'https://letemps.ch/rss/site/culture/musiques'), - (u'Culture - Scènes', - u'https://letemps.ch/rss/site/culture/scenes'), - (u'Culture - Arts plastiques', - u'https://letemps.ch/rss/site/culture/arts_plastiques'), - (u'Culture - Livres', - u'https://letemps.ch/rss/site/culture/livres'), - (u'Lifestyle - Luxe', - u'https://letemps.ch/rss/site/lifestyle/luxe'), - (u'Lifestyle - Mode', - u'https://letemps.ch/rss/site/lifestyle/mode'), - (u'Lifestyle - Horlogerie & Joaillerie', - u'https://letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'), - (u'Lifestyle - Design', - u'https://letemps.ch/rss/site/lifestyle/design'), - (u'Lifestyle - Voyages', - u'https://letemps.ch/rss/site/lifestyle/voyages'), - (u'Lifestyle - Gastronomie', - u'https://letemps.ch/rss/site/lifestyle/gastronomie'), - (u'Lifestyle - Architecture & Immobilier', - u'https://letemps.ch/rss/site/lifestyle/architecture_immobilier'), - (u'Lifestyle - Automobile', - u'https://letemps.ch/rss/site/lifestyle/automobile'), - (u'Opinions', u'http://letemps.ch/rss/site/opinions'), - (u'Opinions - Editoriaux', - u'https://letemps.ch/rss/site/opinions/editoriaux'), - (u'Opinions - Invités', - u'https://letemps.ch/rss/site/opinions/invites'), - (u'Opinions - Chroniques', - u'https://letemps.ch/rss/site/opinions/chroniques'), - (u'Opinions - Chappatte', - u'https://letemps.ch/rss/site/opinions/chappatte') + (u'Actualité - Monde', u'https://letemps.ch/rss/site/actualite/monde'), + ( + u'Actualité - Suisse & régions', + u'https://letemps.ch/rss/site/actualite/suisse_regions' + ), (u'Actualité - Sport', u'https://letemps.ch/rss/site/actualite/sports'), + ( + u'Actualité - Sciences & Environnement', + u'https://letemps.ch/rss/site/actualite/sciences_environnement' + ), + ( + u'Actualité - Multimédia', + u'https://letemps.ch/rss/site/actualite/multimedia' + ), + (u'Actualité - Société', u'https://letemps.ch/rss/site/actualite/societe'), + ( + u'Actualité - Société | Quoi de neuf', + u'https://letemps.ch/rss/site/actualite/societe/quoi_de_neuf' + ), (u'Economie & Finance', u'https://letemps.ch/rss/site/economie_finance'), + ( + u'Economie & Finance - Finance', + u'https://letemps.ch/rss/site/economie_finance/finance' + ), + ( + u'Economie & Finance - Fonds de placement', + u'https://letemps.ch/rss/site/economie_finance/fonds_placement' + ), + ( + u'Economie & Finance - Carrières', + u'https://letemps.ch/rss/site/economie_finance/carrieres' + ), (u'Culture', u'http://letemps.ch/rss/site/culture'), + (u'Culture - Cinémas', u'https://letemps.ch/rss/site/culture/cinema'), + (u'Culture - Musiques', u'https://letemps.ch/rss/site/culture/musiques'), + (u'Culture - Scènes', u'https://letemps.ch/rss/site/culture/scenes'), + ( + u'Culture - Arts plastiques', + u'https://letemps.ch/rss/site/culture/arts_plastiques' + ), (u'Culture - Livres', u'https://letemps.ch/rss/site/culture/livres'), + (u'Lifestyle - Luxe', u'https://letemps.ch/rss/site/lifestyle/luxe'), + (u'Lifestyle - Mode', u'https://letemps.ch/rss/site/lifestyle/mode'), + ( + u'Lifestyle - Horlogerie & Joaillerie', + u'https://letemps.ch/rss/site/lifestyle/horlogerie_joaillerie' + ), (u'Lifestyle - Design', u'https://letemps.ch/rss/site/lifestyle/design'), + (u'Lifestyle - Voyages', u'https://letemps.ch/rss/site/lifestyle/voyages'), + ( + u'Lifestyle - Gastronomie', + u'https://letemps.ch/rss/site/lifestyle/gastronomie' + ), + ( + u'Lifestyle - Architecture & Immobilier', + u'https://letemps.ch/rss/site/lifestyle/architecture_immobilier' + ), + ( + u'Lifestyle - Automobile', + u'https://letemps.ch/rss/site/lifestyle/automobile' + ), (u'Opinions', u'http://letemps.ch/rss/site/opinions'), + ( + u'Opinions - Editoriaux', + u'https://letemps.ch/rss/site/opinions/editoriaux' + ), (u'Opinions - Invités', u'https://letemps.ch/rss/site/opinions/invites'), + ( + u'Opinions - Chroniques', + u'https://letemps.ch/rss/site/opinions/chroniques' + ), + (u'Opinions - Chappatte', u'https://letemps.ch/rss/site/opinions/chappatte') ] - - def parse_feeds(self): - feeds = BasicNewsRecipe.parse_feeds(self) - for feed in feeds: - # The title says it all and the description has has bad characters - # for "Le Temps" - del feed.description - return feeds - - def postprocess_html(self, soup, first): - for tag in soup.findAll('div', attrs={'class': 'box pagination'}): - tag.extract() - if not first: - h = soup.find('h1') - if h is not None: - h.extract() - return soup