calibre/recipes/courrierinternational.recipe
LAntoine 1fc067b99d Fix Courrier International
Update RSS feeds
Add extra CSS
Ignore duplicate articles
2024-09-25 12:30:10 +02:00

105 lines
3.5 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
__license__ = 'GPL v3'
__copyright__ = '2009, Mathieu Godlewski <mathieu at godlewski.fr>\
2015, Rémi Vanicat <vanicat at debian.org'
'''
Courrier International
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class CourrierInternational(BasicNewsRecipe):
title = 'Courrier International'
__author__ = 'Mathieu Godlewski <mathieu at godlewski.fr>'
description = 'Global news in french from international newspapers'
oldest_article = 7
language = 'fr'
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
max_articles_per_feed = 50
ignore_duplicate_articles = {'title'}
no_stylesheets = True
html2lrf_options = ['--base-font-size', '10']
keep_only_tags = dict(name='article')
remove_tags = [
dict(attrs={'class': ['asset-read-more', 'article-secondary', 'article-aside', 'item', 'source-logo', 'source-lang', 'info-time']})
]
extra_css = '''
.article-lede {
margin-bottom: 1em;
}
.caption, .credit {
font-size: 0.9em;
font-style: italic;
}
.info-date, .info-reserved {
font-style: italic;
font-size: 0.9em;
display: block;
}
.strapline {
font-size: 1.17em;
font-weight: bold;
margin: 0.67em 0;
line-height: 1.2;
}
'''
needs_subscription = "optional"
login_url = 'http://www.courrierinternational.com/login'
def get_browser(self):
def is_form_login(form):
return "id" in form.attrs and form.attrs['id'] == "user-login-form"
br = BasicNewsRecipe.get_browser(self)
if self.username:
br.open(self.login_url)
br.select_form(predicate=is_form_login)
br['name'] = self.username
br['pass'] = self.password
br.submit()
return br
def preprocess_html(self, soup):
for link in soup.findAll("a", href=re.compile('^/')):
link["href"] = 'http://www.courrierinternational.com' + link["href"]
return soup
feeds = [
# Some articles requiring subscription fails on download.
('France', 'https://www.courrierinternational.com/feed/rubrique/france/rss.xml'),
('Geopolitique', 'https://www.courrierinternational.com/feed/rubrique/geopolitique/rss.xml'),
('Economie', 'https://www.courrierinternational.com/feed/rubrique/economie/rss.xml'),
('Société', 'https://www.courrierinternational.com/feed/rubrique/societe/rss.xml'),
('Politique', 'https://www.courrierinternational.com/feed/rubrique/politique/rss.xml'),
('Sciences & Environnement', 'https://www.courrierinternational.com/feed/rubrique/science-environnement/rss.xml'),
('Culture', 'https://www.courrierinternational.com/feed/rubrique/culture/rss.xml'),
('Expat', 'https://www.courrierinternational.com/feed/rubrique/expat/rss.xml'),
('Autres', 'http://www.courrierinternational.com/feed/all/rss.xml'),
]