calibre/recipes/liberation.recipe
un-pogaz 41cee6f02d various whitespace (auto-fix)
ruff 'E201,E202,E211,E251,E275'
2025-01-24 11:14:24 +01:00

168 lines
7.6 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
'''
liberation.fr
'''
import base64
import json
import time
from datetime import datetime, timedelta
from urllib.parse import quote, urlencode, urlparse
from mechanize import Request
from calibre import browser
from calibre.web.feeds.news import BasicNewsRecipe
def resize(x):
for k, v in x.items():
if '_750' in k:
return v
m_fr = {
1: 'Janvier', 2: 'Février', 3: 'Mars', 4: 'Avril',
5: 'Mai', 6: 'Juin', 7: 'Juillet', 8: 'Août',
9: 'Septembre', 10: 'Octobre', 11: 'Novembre', 12: 'Décembre'
}
def json_to_html(raw):
data = json.loads(raw)
title = '<h1>' + data['headlines']['basic'] + '</h1>\n'
sub = '<p class="desc">' + data['subheadlines']['basic'] + '</p>'
auth = '<p class="auth">{}</p>\n'
dt = datetime.fromisoformat(data['last_updated_date'][:-1]) + timedelta(seconds=time.timezone)
dt = dt.strftime(m_fr[dt.month] + ' %d, %Y')
a = [x['name'] for x in data['credits']['by']]
if a:
auth = auth.format(', '.join(a) + ' | ' + dt)
else:
auth = auth.format(dt)
lede = ''
if data['promo_items']['basic'].get('type', '') == 'image':
lede = '<br><img src="{}"><div class="figc">{}</div>\n'.format(
resize(data['promo_items']['basic']['resized_image_urls']),
data['promo_items']['basic'].get('caption', '')
)
body = ''
for c in data['content_elements']:
if c.get('type', '') == 'text':
body += '\t<p>' + c['content'] + '</p>\n'
if c.get('type', '') == 'image':
body += '\t<br><img src="{}"><div class="figc">{}</div>\n'.format(
resize(c['resized_image_urls']), c.get('caption', '')
)
if c.get('type', '') == 'header':
body += '\t<h4>' + c['content'] + '</h4>\n'
if c.get('type', '') == 'list':
body += '\t<ul>'
if 'items' in c:
for l in c['items']:
if 'content' in l:
body += '<li>' + l['content'] + '</li>'
body += '\t</ul>'
if c.get('type', '') == 'oembed_response':
if 'raw_oembed' in c:
if 'html' in c['raw_oembed']:
body += c['raw_oembed']['html']
return '<html><body><div>\n' + title + sub + auth + lede + body + '\n</div></body></html>'
class Liberation(BasicNewsRecipe):
title = 'Libération'
__author__ = 'unkn0wn'
description = (
"Libération est un quotidien d'information libre, vigilant et engagé. L'objectif de Libération est de "
'fournir une information complète et vérifiée, dans tous les domaines. Sans préjugés, ni complaisance, '
"ses enquêtes reportages et analyses s'emploient à comprendre et à décrire l'actualité et à révéler "
'les mutations des sociétés et des cultures.'
)
language = 'fr'
oldest_article = 1.15
remove_empty_feeds = True
articles_are_obfuscated = True
timefmt = ' [%s]' % datetime.now().strftime(m_fr[datetime.now().month] + ' %d, %Y')
ignore_duplicate_articles = {'title', 'url'}
key = 'ZWplZVBlaW5nZWl0YWVnaG8zd2VlbmdlZXlvaHB1'
masthead_url = 'https://journal.liberation.fr/img/logo.svg'
extra_css = '''
.desc { font-style:italic; color:#202020; }
.auth { font-size:small; }
.figc { font-size:small; text-align:center; }
blockquote { color:#202020; }
'''
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
feeds = [
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
('International', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/international/?outputType=xml'),
('CheckNews', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/checknews/?outputType=xml'),
('Culture', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/culture/?outputType=xml'),
('Idées et Débats', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/idees-et-debats/?outputType=xml'),
('Société', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/societe/?outputType=xml'),
('Environnement', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/environnement/?outputType=xml'),
('Economie', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/economie/?outputType=xml'),
('Lifestyle', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/lifestyle/?outputType=xml'),
('Portraits', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/portraits/?outputType=xml'),
('Sports', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sports/?outputType=xml'),
('Sciences', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sciences/?outputType=xml'),
('Forums & événements', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/forums/?outputType=xml'),
('Libération', 'https://www.liberation.fr/arc/outboundfeeds/rss/?outputType=xml')
]
def get_obfuscated_article(self, url):
slug = urlparse(url).path
br = browser()
b64 = base64.b64decode(self.key)
query = {
'website': 'liberation',
'website_url': str(slug),
'published': 'true',
'_sourceInclude': '_id,content_restrictions.content_code,credits,promo_items.basic.caption,promo_items.basic.credits,promo_items.basic.url,promo_items.basic.height,promo_items.basic.width,promo_items.basic.resized_image_urls,promo_items.basic.last_updated_date,promo_items.lead_art.caption,promo_items.lead_art.credits,promo_items.lead_art.url,promo_items.lead_art.height,promo_items.lead_art.width,promo_items.lead_art.resized_image_urls,promo_items.lead_art.last_updated_date,source.additional_properties.legacy_url,content_elements,source.source_id,taxonomy.primary_section.additional_properties.original._admin.alias_ids,taxonomy.primary_section.additional_properties.original.navigation.nav_title,taxonomy.primary_section._id,taxonomy.primary_section.name,taxonomy.primary_section.path,taxonomy.tags,label,subheadlines.basic,headlines.basic,source.additional_properties.legacy_url,source.source_type,first_publish_date,display_date,canonical_url' # noqa: E501
}
headers = {
'cache-control': 'public, max-age=5',
'x-api-key': b64.decode(),
'accept-encoding': 'gzip',
'user-agent': 'okhttp/4.11.0'
}
api = 'https://arc.api.liberation.fr/content/v4/?' + urlencode(query, safe='()!', quote_via=quote)
rq = Request(
url=api,
headers=headers
)
raw = br.open(rq).read()
data = {
'data': json_to_html(raw),
'url': url
}
return data
def get_cover_url(self):
soup = self.index_to_soup('https://journal.liberation.fr/')
cover = soup.find(name='img', attrs={'class': 'ui image'})
if cover:
return 'https:' + cover['src']