mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Update Le Temps
This commit is contained in:
parent
d2d94e4744
commit
55ff9d4a51
@ -1,19 +1,17 @@
|
|||||||
#!/usr/bin/env python2
|
#!/usr/bin/env python2
|
||||||
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
from __future__ import with_statement
|
# License: GPLv3 Copyright: 2009, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
from __future__ import print_function
|
|
||||||
|
|
||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
|
||||||
__docformat__ = 'restructuredtext en'
|
|
||||||
|
|
||||||
# -------------------------------
|
|
||||||
# Modified by Roland Kessi - February 2014
|
|
||||||
# -------------------------------
|
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(
|
||||||
|
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class LeTemps(BasicNewsRecipe):
|
class LeTemps(BasicNewsRecipe):
|
||||||
title = u'Le Temps'
|
title = u'Le Temps'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
@ -22,179 +20,100 @@ class LeTemps(BasicNewsRecipe):
|
|||||||
description = 'French news. Needs a subscription from https://www.letemps.ch'
|
description = 'French news. Needs a subscription from https://www.letemps.ch'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
remove_javascript = True
|
remove_javascript = True
|
||||||
recursions = 1
|
|
||||||
encoding = 'UTF-8'
|
encoding = 'UTF-8'
|
||||||
match_regexps = [r'https://www.letemps.ch/Page/Uuid/[-0-9a-f]+\|[1-9]']
|
|
||||||
language = 'fr'
|
language = 'fr'
|
||||||
needs_subscription = True
|
needs_subscription = True
|
||||||
simultaneous_downloads = 5
|
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
remove_empty_feeds = True
|
remove_empty_feeds = True
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
br.open('https://www.letemps.ch/login')
|
br.open('https://www.letemps.ch/user/login')
|
||||||
br.select_form(action='/user/login')
|
br.select_form(action='/user/login')
|
||||||
br['name'] = self.username
|
br['name'] = self.username
|
||||||
br['pass'] = self.password
|
br['pass'] = self.password
|
||||||
raw = br.submit().read()
|
raw = br.submit().read()
|
||||||
if '>Login' in raw:
|
if 'href="/subscribe"' in raw:
|
||||||
raise ValueError('Failed to login to letemps.ch. Check '
|
raise ValueError(
|
||||||
'your username and password')
|
'Failed to login to letemps.ch. Check '
|
||||||
|
'your username and password'
|
||||||
|
)
|
||||||
return br
|
return br
|
||||||
|
|
||||||
def get_article_url(self, article):
|
|
||||||
'''
|
|
||||||
Override in a subclass to customize extraction of the :term:`URL` that points
|
|
||||||
to the content for each article. Return the
|
|
||||||
article URL. It is called with `article`, an object representing a parsed article
|
|
||||||
from a feed. See `feedparser <http://packages.python.org/feedparser/>`_.
|
|
||||||
By default it looks for the original link (for feeds syndicated via a
|
|
||||||
service like feedburner or pheedo) and if found,
|
|
||||||
returns that or else returns
|
|
||||||
`article.link <http://packages.python.org/feedparser/reference-entry-link.html>`_.
|
|
||||||
'''
|
|
||||||
# =======================================================================
|
|
||||||
# Avoid going through http://rss.feedsportal.com/...
|
|
||||||
# =======================================================================
|
|
||||||
for key in article.keys():
|
|
||||||
if key.endswith('_origlink'):
|
|
||||||
url = article[key]
|
|
||||||
if url and url.startswith('https://'):
|
|
||||||
print('Url is :', url)
|
|
||||||
return url
|
|
||||||
ans = article.get('link', None)
|
|
||||||
if not ans and getattr(article, 'links', None):
|
|
||||||
for item in article.links:
|
|
||||||
if item.get('rel', 'alternate') == 'alternate':
|
|
||||||
ans = item['href']
|
|
||||||
break
|
|
||||||
pos = ans.find('letemps0Bch')
|
|
||||||
if pos > -1:
|
|
||||||
ans = 'http://www.' + ans[pos:]
|
|
||||||
ans = ans.replace('0A', '0')
|
|
||||||
ans = ans.replace('0B', '.')
|
|
||||||
ans = ans.replace('0C', '/')
|
|
||||||
ans = ans.replace('0E', '-')
|
|
||||||
return ans
|
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'id': 'content'}),
|
classes('picture-cover article-content'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='div', attrs={'id': 'html5_gallery'}),
|
dict(name=['button', 'source']),
|
||||||
dict(name='ul', attrs={'class': ['tabs', 'social-buttons cf']}),
|
classes('share related-news'),
|
||||||
dict(name='img', attrs={'class': ['bigImg']}),
|
|
||||||
dict(name='div', attrs={'class': [
|
|
||||||
'box function', 'contentInserts', 'box banner', 'related-stories',
|
|
||||||
'box additional', 'galleryOverview', 'position', 'rightAd', 'bottomAd', 'video',
|
|
||||||
]}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
extra_css = '''
|
|
||||||
h1{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;}
|
|
||||||
.headline{font-family:"Georgia","Times New Roman",Times,serif;font-size:large;color:#990000;}
|
|
||||||
.summary_gal{color:#777777;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
|
|
||||||
#capt{color:#1B1B1B;font-family:"Georgia","Times New Roman",Times,serif;font-size:x-small;}
|
|
||||||
#content{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
|
|
||||||
.box.article.important{font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
|
|
||||||
#h2 {font-size: 24px; line-height: 25px; margin-bottom: 14px; text-transform:uppercase;}
|
|
||||||
.lead {font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;font-weight: bold; margin: 10px 0;font-size:small;}
|
|
||||||
p {margin: 0 0 10px 0;}
|
|
||||||
h3{font-size:small;font-weight:bold;}
|
|
||||||
.description{font-size:x-small;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;color:white; }
|
|
||||||
a {color:#1B1B1B; font-size:small;}
|
|
||||||
.linkbox{font-size:x-small;color:#1B1B1B;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
|
|
||||||
|
|
||||||
h2{font-size:small;font-weight:bold;font-family:"Lucida Grande","Lucida Sans Unicode",Arial,Verdana,sans-serif;}
|
|
||||||
p.clear{clear:both;}
|
|
||||||
.heading{font-size:x-small;}
|
|
||||||
.heading strong{color:#940026;}
|
|
||||||
.box dd { clear:both; }
|
|
||||||
.box dl { position:relative; }
|
|
||||||
dl.caption {float:left;overflow:hidden;position:relative;margin: 0 10px 12px -40px;}
|
|
||||||
.caption dd p,
|
|
||||||
.caption dt img { margin-right: 0;margin-bottom: 0;}
|
|
||||||
.caption dt img {float: left;}
|
|
||||||
.caption dd {width: 100%;bottom: -1px;position: absolute;}
|
|
||||||
.caption dd .description {z-index: 2;margin-left: 0px;padding: 3px 4px;position: relative;}
|
|
||||||
'''
|
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Actualité', u'https://letemps.ch/rss/site/'),
|
(u'Actualité', u'https://letemps.ch/rss/site/'),
|
||||||
(u'Actualité - Monde',
|
(u'Actualité - Monde', u'https://letemps.ch/rss/site/actualite/monde'),
|
||||||
u'https://letemps.ch/rss/site/actualite/monde'),
|
(
|
||||||
(u'Actualité - Suisse & régions',
|
u'Actualité - Suisse & régions',
|
||||||
u'https://letemps.ch/rss/site/actualite/suisse_regions'),
|
u'https://letemps.ch/rss/site/actualite/suisse_regions'
|
||||||
(u'Actualité - Sport',
|
), (u'Actualité - Sport', u'https://letemps.ch/rss/site/actualite/sports'),
|
||||||
u'https://letemps.ch/rss/site/actualite/sports'),
|
(
|
||||||
(u'Actualité - Sciences & Environnement',
|
u'Actualité - Sciences & Environnement',
|
||||||
u'https://letemps.ch/rss/site/actualite/sciences_environnement'),
|
u'https://letemps.ch/rss/site/actualite/sciences_environnement'
|
||||||
(u'Actualité - Multimédia',
|
),
|
||||||
u'https://letemps.ch/rss/site/actualite/multimedia'),
|
(
|
||||||
(u'Actualité - Société',
|
u'Actualité - Multimédia',
|
||||||
u'https://letemps.ch/rss/site/actualite/societe'),
|
u'https://letemps.ch/rss/site/actualite/multimedia'
|
||||||
(u'Actualité - Société | Quoi de neuf',
|
),
|
||||||
u'https://letemps.ch/rss/site/actualite/societe/quoi_de_neuf'),
|
(u'Actualité - Société', u'https://letemps.ch/rss/site/actualite/societe'),
|
||||||
(u'Economie & Finance',
|
(
|
||||||
u'https://letemps.ch/rss/site/economie_finance'),
|
u'Actualité - Société | Quoi de neuf',
|
||||||
(u'Economie & Finance - Finance',
|
u'https://letemps.ch/rss/site/actualite/societe/quoi_de_neuf'
|
||||||
u'https://letemps.ch/rss/site/economie_finance/finance'),
|
), (u'Economie & Finance', u'https://letemps.ch/rss/site/economie_finance'),
|
||||||
(u'Economie & Finance - Fonds de placement',
|
(
|
||||||
u'https://letemps.ch/rss/site/economie_finance/fonds_placement'),
|
u'Economie & Finance - Finance',
|
||||||
(u'Economie & Finance - Carrières',
|
u'https://letemps.ch/rss/site/economie_finance/finance'
|
||||||
u'https://letemps.ch/rss/site/economie_finance/carrieres'),
|
),
|
||||||
(u'Culture', u'http://letemps.ch/rss/site/culture'),
|
(
|
||||||
(u'Culture - Cinémas',
|
u'Economie & Finance - Fonds de placement',
|
||||||
u'https://letemps.ch/rss/site/culture/cinema'),
|
u'https://letemps.ch/rss/site/economie_finance/fonds_placement'
|
||||||
(u'Culture - Musiques',
|
),
|
||||||
u'https://letemps.ch/rss/site/culture/musiques'),
|
(
|
||||||
(u'Culture - Scènes',
|
u'Economie & Finance - Carrières',
|
||||||
u'https://letemps.ch/rss/site/culture/scenes'),
|
u'https://letemps.ch/rss/site/economie_finance/carrieres'
|
||||||
(u'Culture - Arts plastiques',
|
), (u'Culture', u'http://letemps.ch/rss/site/culture'),
|
||||||
u'https://letemps.ch/rss/site/culture/arts_plastiques'),
|
(u'Culture - Cinémas', u'https://letemps.ch/rss/site/culture/cinema'),
|
||||||
(u'Culture - Livres',
|
(u'Culture - Musiques', u'https://letemps.ch/rss/site/culture/musiques'),
|
||||||
u'https://letemps.ch/rss/site/culture/livres'),
|
(u'Culture - Scènes', u'https://letemps.ch/rss/site/culture/scenes'),
|
||||||
(u'Lifestyle - Luxe',
|
(
|
||||||
u'https://letemps.ch/rss/site/lifestyle/luxe'),
|
u'Culture - Arts plastiques',
|
||||||
(u'Lifestyle - Mode',
|
u'https://letemps.ch/rss/site/culture/arts_plastiques'
|
||||||
u'https://letemps.ch/rss/site/lifestyle/mode'),
|
), (u'Culture - Livres', u'https://letemps.ch/rss/site/culture/livres'),
|
||||||
(u'Lifestyle - Horlogerie & Joaillerie',
|
(u'Lifestyle - Luxe', u'https://letemps.ch/rss/site/lifestyle/luxe'),
|
||||||
u'https://letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'),
|
(u'Lifestyle - Mode', u'https://letemps.ch/rss/site/lifestyle/mode'),
|
||||||
(u'Lifestyle - Design',
|
(
|
||||||
u'https://letemps.ch/rss/site/lifestyle/design'),
|
u'Lifestyle - Horlogerie & Joaillerie',
|
||||||
(u'Lifestyle - Voyages',
|
u'https://letemps.ch/rss/site/lifestyle/horlogerie_joaillerie'
|
||||||
u'https://letemps.ch/rss/site/lifestyle/voyages'),
|
), (u'Lifestyle - Design', u'https://letemps.ch/rss/site/lifestyle/design'),
|
||||||
(u'Lifestyle - Gastronomie',
|
(u'Lifestyle - Voyages', u'https://letemps.ch/rss/site/lifestyle/voyages'),
|
||||||
u'https://letemps.ch/rss/site/lifestyle/gastronomie'),
|
(
|
||||||
(u'Lifestyle - Architecture & Immobilier',
|
u'Lifestyle - Gastronomie',
|
||||||
u'https://letemps.ch/rss/site/lifestyle/architecture_immobilier'),
|
u'https://letemps.ch/rss/site/lifestyle/gastronomie'
|
||||||
(u'Lifestyle - Automobile',
|
),
|
||||||
u'https://letemps.ch/rss/site/lifestyle/automobile'),
|
(
|
||||||
(u'Opinions', u'http://letemps.ch/rss/site/opinions'),
|
u'Lifestyle - Architecture & Immobilier',
|
||||||
(u'Opinions - Editoriaux',
|
u'https://letemps.ch/rss/site/lifestyle/architecture_immobilier'
|
||||||
u'https://letemps.ch/rss/site/opinions/editoriaux'),
|
),
|
||||||
(u'Opinions - Invités',
|
(
|
||||||
u'https://letemps.ch/rss/site/opinions/invites'),
|
u'Lifestyle - Automobile',
|
||||||
(u'Opinions - Chroniques',
|
u'https://letemps.ch/rss/site/lifestyle/automobile'
|
||||||
u'https://letemps.ch/rss/site/opinions/chroniques'),
|
), (u'Opinions', u'http://letemps.ch/rss/site/opinions'),
|
||||||
(u'Opinions - Chappatte',
|
(
|
||||||
u'https://letemps.ch/rss/site/opinions/chappatte')
|
u'Opinions - Editoriaux',
|
||||||
|
u'https://letemps.ch/rss/site/opinions/editoriaux'
|
||||||
|
), (u'Opinions - Invités', u'https://letemps.ch/rss/site/opinions/invites'),
|
||||||
|
(
|
||||||
|
u'Opinions - Chroniques',
|
||||||
|
u'https://letemps.ch/rss/site/opinions/chroniques'
|
||||||
|
),
|
||||||
|
(u'Opinions - Chappatte', u'https://letemps.ch/rss/site/opinions/chappatte')
|
||||||
]
|
]
|
||||||
|
|
||||||
def parse_feeds(self):
|
|
||||||
feeds = BasicNewsRecipe.parse_feeds(self)
|
|
||||||
for feed in feeds:
|
|
||||||
# The title says it all and the description has has bad characters
|
|
||||||
# for "Le Temps"
|
|
||||||
del feed.description
|
|
||||||
return feeds
|
|
||||||
|
|
||||||
def postprocess_html(self, soup, first):
|
|
||||||
for tag in soup.findAll('div', attrs={'class': 'box pagination'}):
|
|
||||||
tag.extract()
|
|
||||||
if not first:
|
|
||||||
h = soup.find('h1')
|
|
||||||
if h is not None:
|
|
||||||
h.extract()
|
|
||||||
return soup
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user