Telerama by Kabonix

This commit is contained in:
Kovid Goyal 2026-03-18 08:32:15 +05:30
parent 049cdb9c61
commit 1f76bb161c
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

189
recipes/telerama.recipe Normal file
View File

@ -0,0 +1,189 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2026, Kabonix'
import json
import re
from urllib.parse import urlparse
from calibre.web.feeds.news import BasicNewsRecipe
class TeleramaPremium(BasicNewsRecipe):
title = 'Télérama'
__author__ = 'Kabonix'
description = 'Édition complète (API Bypass) - Cover HD & Lecture Pure'
publisher = 'Télérama'
language = 'fr'
encoding = 'utf-8'
oldest_article = 7
max_articles_per_feed = 50
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
# On laisse les images des articles tranquilles
scale_news_images = None
# --- API ---
headers = {
'User-Agent': 'Telerama/4.3.5 (Android; 14)',
'X-Lmd-Token': 'TWPLMOLMO',
'Accept': 'application/json'
}
def get_browser(self, *args, **kwargs):
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
for name, val in self.headers.items():
br.addheaders.append((name, val))
return br
# --- COUVERTURE DYNAMIQUE (Ta demande) ---
def get_cover_url(self):
cover_url = None
try:
self.log('🔍 Recherche de la dernière couverture...')
# On va sur la page kiosque
soup = self.index_to_soup('https://www.telerama.fr/kiosque/telerama')
# On cherche le premier élément "popin-link" avec une data-cover-url
# Le premier de la liste est toujours le dernier numéro paru
link = soup.find('a', attrs={'class': 'popin-link', 'data-cover-url': True})
if link:
url = link['data-cover-url']
# L'URL contient /180/ (basse déf). On passe en HD /1200/
# Ex: .../0/0/180/0/... -> .../0/0/1200/0/...
cover_url = url.replace('/180/', '/1200/')
self.log(f'✅ Couverture trouvée : {cover_url}')
else:
self.log('⚠️ Aucune couverture trouvée dans le kiosque.')
except Exception as e:
self.log(f'❌ Erreur récupération couverture : {e}')
return cover_url
# --- BYPASS API ---
def get_article_url(self, article):
url = article.get('link', article.get('url', ''))
path = urlparse(url).path
return 'https://apps.telerama.fr/tlr/v1/premium-android-phone/element?id={}'.format(path)
# --- JSON TO HTML ---
def preprocess_raw_html(self, raw_html, url):
if '/tlr/v1/' in url:
try:
data = json.loads(raw_html)
content = ''
title = 'Télérama'
if 'templates' in data and 'raw_content' in data['templates']:
content = data['templates']['raw_content']['content']
elif 'body' in data:
content = data['body']
if 'template_vars' in data:
title = data['template_vars'].get('share_title', 'Article Télérama')
if not content:
return '<html><body><h2>Contenu vide</h2></body></html>'
# Nettoyage préventif
content = content.replace('{{{ scripts_bottom }}}', '')
content = re.sub(r'>\s*[•·]\s*<', '><', content)
# Fix images
content = content.replace('{{width}}', '1200').replace('{{height}}', '')
content = content.replace('%7B%7Bwidth%7D%7D', '1200')
html = f'<html><head><title>{title}</title></head><body><h1 id="main-title">{title}</h1>{content}</body></html>'
return html
except Exception as e:
self.log(f'Erreur JSON : {e}')
return raw_html
return raw_html
# --- NETTOYAGE ---
def preprocess_html(self, soup):
# 1. Suppression doublons structurels
for header in soup.find_all(attrs={'class': re.compile(r'article__page-header|header__article', re.I)}):
header.decompose()
for ns in soup.find_all('noscript'):
ns.decompose()
# 2. Suppression "À lire aussi"
for p in soup.find_all(['p', 'h3', 'h4', 'div', 'aside']):
text = p.get_text().strip()
if re.search(r'^(À|A) lire aussi', text, re.IGNORECASE):
p.decompose()
# 3. Nettoyage Méta TV et Puces
for tag in soup.find_all(['p', 'div', 'span', 'li', 'ul']):
text = tag.get_text().strip()
normalized_text = re.sub(r'\s+', ' ', text)
# Puces seules
if re.match(r'^[\s\n\r•·|\-.]+$', text):
tag.decompose()
continue
# Mots clés TV seuls
if re.match(r'^(Direct|Inédit|Replay|\s)+$', normalized_text, re.IGNORECASE):
tag.decompose()
continue
# Mots clés TV avec séparateurs
if re.search(r'(Direct|Inédit|Replay)\s*[•·-]', text, re.IGNORECASE):
tag.decompose()
# 4. SUPPRESSION DES LIENS (Lecture Pure)
for a in soup.find_all('a'):
a.unwrap()
return soup
keep_only_tags = [
dict(name='h1', attrs={'id': 'main-title'}),
dict(attrs={'class': ['article__page-content', 'article-body']}),
]
remove_tags = [
dict(attrs={'class': re.compile(r'paywall|premium-banner|banner|pubstack|marketing', re.I)}),
dict(attrs={'class': re.compile(r'sharing|social|bookmark|button|btn|openapp|listBtns', re.I)}),
dict(attrs={'class': re.compile(r'OUTBRAIN|forecast|overlay', re.I)}),
dict(name=['script', 'style', 'nav', 'footer', 'button', 'iframe'])
]
extra_css = '''
h1 {
font-family: "Georgia", serif;
font-size: 1.5em;
font-weight: bold;
text-align: center;
margin-bottom: 0.5em;
color: #111;
}
.article__label-subscriber {
display: block; background-color: #ffe600; color: #000; font-weight: bold;
font-size: 0.8em; text-transform: uppercase; padding: 4px 8px;
margin: 0 auto 1em auto; width: fit-content; border-radius: 4px;
}
.article__chapeau { font-weight: bold; font-style: italic; margin: 1.5em 0; font-size: 1.1em; color: #444; }
p { text-align: justify; line-height: 1.5; margin-bottom: 1em; }
figure { margin: 1.5em 0; }
img { display: block; margin: 0 auto; max-width: 100%; height: auto; }
figcaption, .media__caption, .media__legend { font-size: 0.75em; color: #666; text-align: center; font-style: italic; margin-top: 0.5em; }
.author { font-weight: bold; margin-top: 2em; border-top: 1px solid #eee; padding-top: 1em; color: #333; }
a { color: inherit; text-decoration: none; pointer-events: none; }
'''
feeds = [
('À la une', 'https://www.telerama.fr/rss/une.xml'),
('Cinéma', 'https://www.telerama.fr/rss/cinema.xml'),
('Séries', 'https://www.telerama.fr/rss/series.xml'),
('Télévision', 'https://www.telerama.fr/rss/television.xml'),
('Musique', 'https://www.telerama.fr/rss/musique.xml'),
('Livres', 'https://www.telerama.fr/rss/livres.xml'),
]