mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
168 lines
7.6 KiB
Python
168 lines
7.6 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
'''
|
|
liberation.fr
|
|
'''
|
|
|
|
import base64
|
|
import json
|
|
import time
|
|
from datetime import datetime, timedelta
|
|
from urllib.parse import quote, urlencode, urlparse
|
|
|
|
from mechanize import Request
|
|
|
|
from calibre import browser
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
def resize(x):
|
|
for k, v in x.items():
|
|
if '_750' in k:
|
|
return v
|
|
|
|
|
|
m_fr = {
|
|
1: 'Janvier', 2: 'Février', 3: 'Mars', 4: 'Avril',
|
|
5: 'Mai', 6: 'Juin', 7: 'Juillet', 8: 'Août',
|
|
9: 'Septembre', 10: 'Octobre', 11: 'Novembre', 12: 'Décembre'
|
|
}
|
|
|
|
|
|
def json_to_html(raw):
|
|
data = json.loads(raw)
|
|
|
|
title = '<h1>' + data['headlines']['basic'] + '</h1>\n'
|
|
sub = '<p class="desc">' + data['subheadlines']['basic'] + '</p>'
|
|
|
|
auth = '<p class="auth">{}</p>\n'
|
|
dt = datetime.fromisoformat(data['last_updated_date'][:-1]) + timedelta(seconds=time.timezone)
|
|
dt = dt.strftime(m_fr[dt.month] + ' %d, %Y')
|
|
a = [x['name'] for x in data['credits']['by']]
|
|
if a:
|
|
auth = auth.format(', '.join(a) + ' | ' + dt)
|
|
else:
|
|
auth = auth.format(dt)
|
|
|
|
lede = ''
|
|
if data['promo_items']['basic'].get('type', '') == 'image':
|
|
lede = '<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
|
resize(data['promo_items']['basic']['resized_image_urls']),
|
|
data['promo_items']['basic'].get('caption', '')
|
|
)
|
|
|
|
body = ''
|
|
for c in data['content_elements']:
|
|
if c.get('type', '') == 'text':
|
|
body += '\t<p>' + c['content'] + '</p>\n'
|
|
if c.get('type', '') == 'image':
|
|
body += '\t<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
|
resize(c['resized_image_urls']), c.get('caption', '')
|
|
)
|
|
if c.get('type', '') == 'header':
|
|
body += '\t<h4>' + c['content'] + '</h4>\n'
|
|
if c.get('type', '') == 'list':
|
|
body += '\t<ul>'
|
|
if 'items' in c:
|
|
for l in c['items']:
|
|
if 'content' in l:
|
|
body += '<li>' + l['content'] + '</li>'
|
|
body += '\t</ul>'
|
|
if c.get('type', '') == 'oembed_response':
|
|
if 'raw_oembed' in c:
|
|
if 'html' in c['raw_oembed']:
|
|
body += c['raw_oembed']['html']
|
|
|
|
return '<html><body><div>\n' + title + sub + auth + lede + body + '\n</div></body></html>'
|
|
|
|
|
|
class Liberation(BasicNewsRecipe):
|
|
title = 'Libération'
|
|
__author__ = 'unkn0wn'
|
|
description = (
|
|
"Libération est un quotidien d'information libre, vigilant et engagé. L'objectif de Libération est de "
|
|
'fournir une information complète et vérifiée, dans tous les domaines. Sans préjugés, ni complaisance, '
|
|
"ses enquêtes reportages et analyses s'emploient à comprendre et à décrire l'actualité et à révéler "
|
|
'les mutations des sociétés et des cultures.'
|
|
)
|
|
language = 'fr'
|
|
oldest_article = 1.15
|
|
remove_empty_feeds = True
|
|
articles_are_obfuscated = True
|
|
timefmt = ' [%s]' % datetime.now().strftime(m_fr[datetime.now().month] + ' %d, %Y')
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
key = 'ZWplZVBlaW5nZWl0YWVnaG8zd2VlbmdlZXlvaHB1'
|
|
masthead_url = 'https://journal.liberation.fr/img/logo.svg'
|
|
extra_css = '''
|
|
.desc { font-style:italic; color:#202020; }
|
|
.auth { font-size:small; }
|
|
.figc { font-size:small; text-align:center; }
|
|
blockquote { color:#202020; }
|
|
'''
|
|
|
|
recipe_specific_options = {
|
|
'days': {
|
|
'short': 'Oldest article to download from this news source. In days ',
|
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
|
'default': str(oldest_article)
|
|
}
|
|
}
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
|
d = self.recipe_specific_options.get('days')
|
|
if d and isinstance(d, str):
|
|
self.oldest_article = float(d)
|
|
|
|
feeds = [
|
|
('A la une', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/collection/accueil-une/?outputType=xml'),
|
|
('Politique', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/politique/?outputType=xml'),
|
|
('International', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/international/?outputType=xml'),
|
|
('CheckNews', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/checknews/?outputType=xml'),
|
|
('Culture', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/culture/?outputType=xml'),
|
|
('Idées et Débats', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/idees-et-debats/?outputType=xml'),
|
|
('Société', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/societe/?outputType=xml'),
|
|
('Environnement', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/environnement/?outputType=xml'),
|
|
('Economie', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/economie/?outputType=xml'),
|
|
('Lifestyle', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/lifestyle/?outputType=xml'),
|
|
('Portraits', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/portraits/?outputType=xml'),
|
|
('Sports', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sports/?outputType=xml'),
|
|
('Sciences', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/sciences/?outputType=xml'),
|
|
('Forums & événements', 'https://www.liberation.fr/arc/outboundfeeds/rss-all/category/forums/?outputType=xml'),
|
|
('Libération', 'https://www.liberation.fr/arc/outboundfeeds/rss/?outputType=xml')
|
|
]
|
|
|
|
def get_obfuscated_article(self, url):
|
|
slug = urlparse(url).path
|
|
br = browser()
|
|
b64 = base64.b64decode(self.key)
|
|
query = {
|
|
'website': 'liberation',
|
|
'website_url': str(slug),
|
|
'published': 'true',
|
|
'_sourceInclude': '_id,content_restrictions.content_code,credits,promo_items.basic.caption,promo_items.basic.credits,promo_items.basic.url,promo_items.basic.height,promo_items.basic.width,promo_items.basic.resized_image_urls,promo_items.basic.last_updated_date,promo_items.lead_art.caption,promo_items.lead_art.credits,promo_items.lead_art.url,promo_items.lead_art.height,promo_items.lead_art.width,promo_items.lead_art.resized_image_urls,promo_items.lead_art.last_updated_date,source.additional_properties.legacy_url,content_elements,source.source_id,taxonomy.primary_section.additional_properties.original._admin.alias_ids,taxonomy.primary_section.additional_properties.original.navigation.nav_title,taxonomy.primary_section._id,taxonomy.primary_section.name,taxonomy.primary_section.path,taxonomy.tags,label,subheadlines.basic,headlines.basic,source.additional_properties.legacy_url,source.source_type,first_publish_date,display_date,canonical_url' # noqa: E501
|
|
}
|
|
headers = {
|
|
'cache-control': 'public, max-age=5',
|
|
'x-api-key': b64.decode(),
|
|
'accept-encoding': 'gzip',
|
|
'user-agent': 'okhttp/4.11.0'
|
|
}
|
|
api = 'https://arc.api.liberation.fr/content/v4/?' + urlencode(query, safe='()!', quote_via=quote)
|
|
rq = Request(
|
|
url=api,
|
|
headers=headers
|
|
)
|
|
raw = br.open(rq).read()
|
|
data = {
|
|
'data': json_to_html(raw),
|
|
'url': url
|
|
}
|
|
return data
|
|
|
|
def get_cover_url(self):
|
|
soup = self.index_to_soup('https://journal.liberation.fr/')
|
|
cover = soup.find(name='img', attrs={'class': 'ui image'})
|
|
if cover:
|
|
return 'https:' + cover['src']
|