#!/usr/bin/env python # vim:fileencoding=utf-8 ''' liberation.fr ''' import base64 import json import time from datetime import datetime, timedelta from urllib.parse import quote, urlencode, urlparse from mechanize import Request from calibre import browser from calibre.web.feeds.news import BasicNewsRecipe def resize(x): for k, v in x.items(): if '_750' in k: return v m_fr = { 1: 'Janvier', 2: 'Février', 3: 'Mars', 4: 'Avril', 5: 'Mai', 6: 'Juin', 7: 'Juillet', 8: 'Août', 9: 'Septembre', 10: 'Octobre', 11: 'Novembre', 12: 'Décembre' } def json_to_html(raw): data = json.loads(raw) title = '

' + data['headlines']['basic'] + '

\n' sub = '

' + data['subheadlines']['basic'] + '

' auth = '

{}

\n' dt = datetime.fromisoformat(data['last_updated_date'][:-1]) + timedelta(seconds=time.timezone) dt = dt.strftime(m_fr[dt.month] + ' %d, %Y') a = [x['name'] for x in data['credits']['by']] if a: auth = auth.format(', '.join(a) + ' | ' + dt) else: auth = auth.format(dt) lede = '' if data['promo_items']['basic'].get('type', '') == 'image': lede = '
{}
\n'.format( resize(data['promo_items']['basic']['resized_image_urls']), data['promo_items']['basic'].get('caption', '') ) body = '' for c in data['content_elements']: if c.get('type', '') == 'text': body += '\t

' + c['content'] + '

\n' if c.get('type', '') == 'image': body += '\t
{}
\n'.format( resize(c['resized_image_urls']), c.get('caption', '') ) if c.get('type', '') == 'header': body += '\t

' + c['content'] + '

\n' if c.get('type', '') == 'list': body += '\t' if c.get('type', '') == 'oembed_response': if 'raw_oembed' in c: if 'html' in c['raw_oembed']: body += c['raw_oembed']['html'] return '
\n' + title + sub + auth + lede + body + '\n
' class Liberation(BasicNewsRecipe): title = 'Libération' __author__ = 'unkn0wn' description = ( "Libération est un quotidien d'information libre, vigilant et engagé. L'objectif de Libération est de " 'fournir une information complète et vérifiée, dans tous les domaines. Sans préjugés, ni complaisance, ' "ses enquêtes reportages et analyses s'emploient à comprendre et à décrire l'actualité et à révéler " 'les mutations des sociétés et des cultures.' ) language = 'fr' oldest_article = 1.15 remove_empty_feeds = True articles_are_obfuscated = True timefmt = ' [%s]' % datetime.now().strftime(m_fr[datetime.now().month] + ' %d, %Y') ignore_duplicate_articles = {'title', 'url'} key = 'ZWplZVBlaW5nZWl0YWVnaG8zd2VlbmdlZXlvaHB1' masthead_url = 'https://journal.liberation.fr/img/logo.svg' extra_css = ''' .desc { font-style:italic; color:#202020; } .auth { font-size:small; } .figc { font-size:small; text-align:center; } blockquote { color:#202020; } ''' recipe_specific_options = { 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', 'default': str(oldest_article) } } def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) feeds = [ ('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'), ('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'), ('International', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/international/?outputType=xml'), ('CheckNews', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/checknews/?outputType=xml'), ('Culture', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/culture/?outputType=xml'), ('Idées et Débats', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/idees-et-debats/?outputType=xml'), ('Société', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/societe/?outputType=xml'), ('Environnement', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/environnement/?outputType=xml'), ('Economie', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/economie/?outputType=xml'), ('Lifestyle', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/lifestyle/?outputType=xml'), ('Portraits', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/portraits/?outputType=xml'), ('Sports', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sports/?outputType=xml'), ('Sciences', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sciences/?outputType=xml'), ('Forums & événements', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/forums/?outputType=xml'), ('Libération', 'https://www.liberation.fr/arc/outboundfeeds/rss/?outputType=xml') ] def get_obfuscated_article(self, url): slug = urlparse(url).path br = browser() b64 = base64.b64decode(self.key) query = { 'website': 'liberation', 'website_url': str(slug), 'published': 'true', '_sourceInclude': '_id,content_restrictions.content_code,credits,promo_items.basic.caption,promo_items.basic.credits,promo_items.basic.url,promo_items.basic.height,promo_items.basic.width,promo_items.basic.resized_image_urls,promo_items.basic.last_updated_date,promo_items.lead_art.caption,promo_items.lead_art.credits,promo_items.lead_art.url,promo_items.lead_art.height,promo_items.lead_art.width,promo_items.lead_art.resized_image_urls,promo_items.lead_art.last_updated_date,source.additional_properties.legacy_url,content_elements,source.source_id,taxonomy.primary_section.additional_properties.original._admin.alias_ids,taxonomy.primary_section.additional_properties.original.navigation.nav_title,taxonomy.primary_section._id,taxonomy.primary_section.name,taxonomy.primary_section.path,taxonomy.tags,label,subheadlines.basic,headlines.basic,source.additional_properties.legacy_url,source.source_type,first_publish_date,display_date,canonical_url' # noqa: E501 } headers = { 'cache-control': 'public, max-age=5', 'x-api-key': b64.decode(), 'accept-encoding': 'gzip', 'user-agent': 'okhttp/4.11.0' } api = 'https://arc.api.liberation.fr/content/v4/?' + urlencode(query, safe='()!', quote_via=quote) rq = Request( url=api, headers=headers ) raw = br.open(rq).read() data = { 'data': json_to_html(raw), 'url': url } return data def get_cover_url(self): soup = self.index_to_soup('https://journal.liberation.fr/') cover = soup.find(name='img', attrs={'class': 'ui image'}) if cover: return 'https:' + cover['src']