From 1f76bb161cbef4876e1bb32469967750cd72e92d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 18 Mar 2026 08:32:15 +0530 Subject: [PATCH] Telerama by Kabonix --- recipes/telerama.recipe | 189 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 recipes/telerama.recipe diff --git a/recipes/telerama.recipe b/recipes/telerama.recipe new file mode 100644 index 0000000000..b84b138077 --- /dev/null +++ b/recipes/telerama.recipe @@ -0,0 +1,189 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals + +__license__ = 'GPL v3' +__copyright__ = '2026, Kabonix' + +import json +import re +from urllib.parse import urlparse + +from calibre.web.feeds.news import BasicNewsRecipe + + +class TeleramaPremium(BasicNewsRecipe): + title = 'Télérama' + __author__ = 'Kabonix' + description = 'Édition complète (API Bypass) - Cover HD & Lecture Pure' + publisher = 'Télérama' + language = 'fr' + encoding = 'utf-8' + + oldest_article = 7 + max_articles_per_feed = 50 + no_stylesheets = True + ignore_duplicate_articles = {'title', 'url'} + + # On laisse les images des articles tranquilles + scale_news_images = None + + # --- API --- + headers = { + 'User-Agent': 'Telerama/4.3.5 (Android; 14)', + 'X-Lmd-Token': 'TWPLMOLMO', + 'Accept': 'application/json' + } + + def get_browser(self, *args, **kwargs): + br = BasicNewsRecipe.get_browser(self, *args, **kwargs) + for name, val in self.headers.items(): + br.addheaders.append((name, val)) + return br + + # --- COUVERTURE DYNAMIQUE (Ta demande) --- + def get_cover_url(self): + cover_url = None + try: + self.log('🔍 Recherche de la dernière couverture...') + # On va sur la page kiosque + soup = self.index_to_soup('https://www.telerama.fr/kiosque/telerama') + + # On cherche le premier élément "popin-link" avec une data-cover-url + # Le premier de la liste est toujours le dernier numéro paru + link = soup.find('a', attrs={'class': 'popin-link', 'data-cover-url': True}) + + if link: + url = link['data-cover-url'] + # L'URL contient /180/ (basse déf). On passe en HD /1200/ + # Ex: .../0/0/180/0/... -> .../0/0/1200/0/... + cover_url = url.replace('/180/', '/1200/') + self.log(f'✅ Couverture trouvée : {cover_url}') + else: + self.log('⚠️ Aucune couverture trouvée dans le kiosque.') + except Exception as e: + self.log(f'❌ Erreur récupération couverture : {e}') + + return cover_url + + # --- BYPASS API --- + def get_article_url(self, article): + url = article.get('link', article.get('url', '')) + path = urlparse(url).path + return 'https://apps.telerama.fr/tlr/v1/premium-android-phone/element?id={}'.format(path) + + # --- JSON TO HTML --- + def preprocess_raw_html(self, raw_html, url): + if '/tlr/v1/' in url: + try: + data = json.loads(raw_html) + content = '' + title = 'Télérama' + + if 'templates' in data and 'raw_content' in data['templates']: + content = data['templates']['raw_content']['content'] + elif 'body' in data: + content = data['body'] + + if 'template_vars' in data: + title = data['template_vars'].get('share_title', 'Article Télérama') + + if not content: + return '

Contenu vide

' + + # Nettoyage préventif + content = content.replace('{{{ scripts_bottom }}}', '') + content = re.sub(r'>\s*[•·]\s*<', '><', content) + + # Fix images + content = content.replace('{{width}}', '1200').replace('{{height}}', '') + content = content.replace('%7B%7Bwidth%7D%7D', '1200') + + html = f'{title}

{title}

{content}' + return html + + except Exception as e: + self.log(f'Erreur JSON : {e}') + return raw_html + return raw_html + + # --- NETTOYAGE --- + def preprocess_html(self, soup): + # 1. Suppression doublons structurels + for header in soup.find_all(attrs={'class': re.compile(r'article__page-header|header__article', re.I)}): + header.decompose() + for ns in soup.find_all('noscript'): + ns.decompose() + + # 2. Suppression "À lire aussi" + for p in soup.find_all(['p', 'h3', 'h4', 'div', 'aside']): + text = p.get_text().strip() + if re.search(r'^(À|A) lire aussi', text, re.IGNORECASE): + p.decompose() + + # 3. Nettoyage Méta TV et Puces + for tag in soup.find_all(['p', 'div', 'span', 'li', 'ul']): + text = tag.get_text().strip() + normalized_text = re.sub(r'\s+', ' ', text) + + # Puces seules + if re.match(r'^[\s\n\r•·|\-.]+$', text): + tag.decompose() + continue + # Mots clés TV seuls + if re.match(r'^(Direct|Inédit|Replay|\s)+$', normalized_text, re.IGNORECASE): + tag.decompose() + continue + # Mots clés TV avec séparateurs + if re.search(r'(Direct|Inédit|Replay)\s*[•·-]', text, re.IGNORECASE): + tag.decompose() + + # 4. SUPPRESSION DES LIENS (Lecture Pure) + for a in soup.find_all('a'): + a.unwrap() + + return soup + + keep_only_tags = [ + dict(name='h1', attrs={'id': 'main-title'}), + dict(attrs={'class': ['article__page-content', 'article-body']}), + ] + + remove_tags = [ + dict(attrs={'class': re.compile(r'paywall|premium-banner|banner|pubstack|marketing', re.I)}), + dict(attrs={'class': re.compile(r'sharing|social|bookmark|button|btn|openapp|listBtns', re.I)}), + dict(attrs={'class': re.compile(r'OUTBRAIN|forecast|overlay', re.I)}), + dict(name=['script', 'style', 'nav', 'footer', 'button', 'iframe']) + ] + + extra_css = ''' + h1 { + font-family: "Georgia", serif; + font-size: 1.5em; + font-weight: bold; + text-align: center; + margin-bottom: 0.5em; + color: #111; + } + .article__label-subscriber { + display: block; background-color: #ffe600; color: #000; font-weight: bold; + font-size: 0.8em; text-transform: uppercase; padding: 4px 8px; + margin: 0 auto 1em auto; width: fit-content; border-radius: 4px; + } + .article__chapeau { font-weight: bold; font-style: italic; margin: 1.5em 0; font-size: 1.1em; color: #444; } + p { text-align: justify; line-height: 1.5; margin-bottom: 1em; } + figure { margin: 1.5em 0; } + img { display: block; margin: 0 auto; max-width: 100%; height: auto; } + figcaption, .media__caption, .media__legend { font-size: 0.75em; color: #666; text-align: center; font-style: italic; margin-top: 0.5em; } + .author { font-weight: bold; margin-top: 2em; border-top: 1px solid #eee; padding-top: 1em; color: #333; } + a { color: inherit; text-decoration: none; pointer-events: none; } + ''' + + feeds = [ + ('À la une', 'https://www.telerama.fr/rss/une.xml'), + ('Cinéma', 'https://www.telerama.fr/rss/cinema.xml'), + ('Séries', 'https://www.telerama.fr/rss/series.xml'), + ('Télévision', 'https://www.telerama.fr/rss/television.xml'), + ('Musique', 'https://www.telerama.fr/rss/musique.xml'), + ('Livres', 'https://www.telerama.fr/rss/livres.xml'), + ]