mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'patch-1' of https://github.com/claudehenchoz/calibre
This commit is contained in:
commit
7b16d17808
@ -1,66 +1,61 @@
|
|||||||
__license__ = 'GPL v3'
|
|
||||||
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>, 2012 Bernd Leinfelder <skoll1975@gmail.com>'
|
|
||||||
|
|
||||||
'''
|
|
||||||
www.nzz.ch
|
|
||||||
'''
|
|
||||||
|
|
||||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
class Nzz(BasicNewsRecipe):
|
class Nzz(BasicNewsRecipe):
|
||||||
title = 'NZZ Online'
|
title = 'NZZ'
|
||||||
__author__ = 'Darko Miletic, Bernd Leinfelder'
|
__author__ = 'Claude Henchoz'
|
||||||
description = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport'
|
description = 'Neue Zürcher Zeitung'
|
||||||
publisher = 'NZZ AG'
|
publisher = 'Neue Zürcher Zeitung'
|
||||||
category = 'news, politics, nachrichten, Switzerland'
|
category = 'news, politics'
|
||||||
oldest_article = 2
|
|
||||||
max_articles_per_feed = 100
|
|
||||||
no_stylesheets = True
|
|
||||||
encoding = 'utf-8'
|
|
||||||
use_embedded_content = False
|
|
||||||
language = 'de'
|
language = 'de'
|
||||||
extra_css = """
|
no_stylesheets = True
|
||||||
body{font-family: Georgia,"Times New Roman",Times,serif }
|
use_embedded_content = False
|
||||||
.artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif}
|
encoding = 'utf-8'
|
||||||
.bildLegende{font-size: small}
|
timefmt = ' [%a, %d %b %Y %H:%M:%S %z]'
|
||||||
.autor{font-size: 0.9375em; color: #666666}
|
|
||||||
.quote{font-size: large !important;
|
|
||||||
font-style: italic;
|
|
||||||
font-weight: normal !important;
|
|
||||||
border-bottom: 1px dotted #BFBFBF;
|
|
||||||
border-top: 1px dotted #BFBFBF;
|
|
||||||
line-height: 1.25em}
|
|
||||||
.quelle{color: #666666; font-style: italic; white-space: nowrap}
|
|
||||||
"""
|
|
||||||
|
|
||||||
conversion_options = {
|
scale_news_images = (600, 400)
|
||||||
'comments': description, 'tags': category, 'language': language, 'publisher': publisher
|
scale_news_images_to_device = True
|
||||||
}
|
|
||||||
|
masthead_url = 'https://upload.wikimedia.org/wikipedia/commons/thumb/3/37/Neue_Z%C3%BCrcher_Zeitung.svg/800px-Neue_Z%C3%BCrcher_Zeitung.svg.png'
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name='section', attrs={'class': 'container--article'})]
|
||||||
|
|
||||||
remove_attributes = ['width', 'height', 'lang']
|
|
||||||
remove_tags_before = dict(id='main')
|
|
||||||
remove_tags_after = dict(id='articleBodyText')
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['object', 'link', 'base', 'meta', 'iframe']), dict(
|
dict(name='div', attrs={'class': 'progressbar__wrapper'}),
|
||||||
id='social-media-floater'), dict(name='div', attrs={'class': ['box']})
|
dict(name='div', attrs={'class': 'headline__meta'}),
|
||||||
|
dict(name='figcaption', attrs={'class': 'articlecomponent__description'}),
|
||||||
|
dict(name='div', attrs={'class': 'nzzinteraction'}),
|
||||||
|
dict(name='section', attrs={'class': 'nzzinteraction'}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
remove_attributes = ['style', 'font', 'class']
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
|
('Neueste Artikel', 'https://www.nzz.ch/recent.rss'),
|
||||||
(u'International', u'http://www.nzz.ch/aktuell/international.rss'),
|
('Topthemen der Startseite', 'https://www.nzz.ch/startseite.rss'),
|
||||||
(u'Schweiz', u'http://www.nzz.ch/aktuell/schweiz.rss'),
|
('International', 'https://www.nzz.ch/international.rss'),
|
||||||
(u'Wirtschaft', u'http://www.nzz.ch/aktuell/wirtschaft/uebersicht.rss'),
|
('Schweiz', 'https://www.nzz.ch/schweiz.rss'),
|
||||||
(u'Finanzmaerkte', u'http://www.nzz.ch/finanzen/uebersicht/finanznachrichten.rss'),
|
('Wirtschaft', 'https://www.nzz.ch/wirtschaft.rss'),
|
||||||
(u'Zuerich', u'http://www.nzz.ch/aktuell/zuerich/uebersicht.rss'),
|
('Finanznachrichten', 'https://www.nzz.ch/finanzen.rss'),
|
||||||
(u'Sport', u'http://www.nzz.ch/aktuell/sport/uebersicht.rss'),
|
('Kultur', 'https://www.nzz.ch/feuilleton.rss'),
|
||||||
(u'Panorama', u'http://www.nzz.ch/aktuell/panorama.rss'),
|
('Sport', 'https://www.nzz.ch/sport.rss'),
|
||||||
(u'Kultur', u'http://www.nzz.ch/aktuell/feuilleton/uebersicht.rss'),
|
('Zürich', 'https://www.nzz.ch/zuerich.rss'),
|
||||||
(u'Wissenschaft', u'http://www.nzz.ch/wissen/uebersicht.rss'),
|
('Panorama', 'https://www.nzz.ch/panorama.rss'),
|
||||||
(u'Reisen', u'http://www.nzz.ch/lebensart/reisen-freizeit.rss'),
|
('Wissenschaft', 'https://www.nzz.ch/wissenschaft.rss'),
|
||||||
(u'Auto Mobil', u'http://www.nzz.ch/lebensart/auto-mobil.rss'),
|
('Auto', 'https://www.nzz.ch/mobilitaet/auto-mobil.rss'),
|
||||||
(u'Digital', u'http://www.nzz.ch/lebensart/digital.rss'),
|
('Technologie', 'https://www.nzz.ch/technologie.rss'),
|
||||||
(u'Stil', u'http://www.nzz.ch/lebensart/stil.rss'),
|
|
||||||
(u'Wein-Keller', u'http://www.nzz.ch/lebensart/wein-keller.rss')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def get_browser(self, *args, **kwargs):
|
||||||
|
kwargs['user_agent'] = 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)'
|
||||||
|
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
|
||||||
|
br.addheaders += [
|
||||||
|
('Referer', 'https://www.google.com/'),
|
||||||
|
('X-Forwarded-For', '66.249.66.1')
|
||||||
|
]
|
||||||
|
return br
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# Fix lazy-loading images
|
||||||
|
for img in soup.findAll('img', attrs={'srcset': True}):
|
||||||
|
img['src'] = img['srcset'].split()[0]
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user