mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
61 lines
2.6 KiB
Plaintext
61 lines
2.6 KiB
Plaintext
__license__ = 'GPL v3'
|
|
__copyright__ = '2013, Alexander Schremmer <alex@alexanderweb.de>, Robert Riemann <robert@riemann.cc>'
|
|
|
|
import re
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class TazRSSRecipe(BasicNewsRecipe):
|
|
title = u'Taz - die Tageszeitung'
|
|
description = u'Taz.de - die tageszeitung (Anpassung von Robert)'
|
|
__author__ = 'Alexander Schremmer, Robert Riemann'
|
|
language = 'de'
|
|
lang = 'de-DE'
|
|
category = 'news, Germany'
|
|
timefmt = ' [%a, %d %b %Y]'
|
|
publication_type = 'newspaper'
|
|
remove_empty_feeds = True
|
|
use_embedded_content = False
|
|
oldest_article = 7
|
|
max_articles_per_feed = 100
|
|
publisher = 'taz Entwicklungs GmbH & Co. Medien KG'
|
|
# masthead_url = u'http://galeria-autonomica.de/wp-content/uploads/a_taz-logo.gif'
|
|
masthead_url = u'http://upload.wikimedia.org/wikipedia/de/thumb/1/15/Die-Tageszeitung-Logo.svg/500px-Die-Tageszeitung-Logo.svg.png'
|
|
|
|
conversion_options = {'publisher': publisher,
|
|
'language': lang,
|
|
}
|
|
feeds = [
|
|
(u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'),
|
|
(u'Politik', u'http://www.taz.de/Politik/!p2;rss/'),
|
|
(u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'),
|
|
(u'Netz', u'http://www.taz.de/Netz/!p5;rss/'),
|
|
(u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'),
|
|
(u'Leben', u'http://www.taz.de/Leben/!p10;rss/'),
|
|
(u'Sport', u'http://www.taz.de/Sport/!p12;rss/'),
|
|
(u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'),
|
|
(u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'),
|
|
(u'Nord', u'http://www.taz.de/Nord/!p11;rss/')
|
|
]
|
|
# omit articles already linked in Schlagzeilen feed
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
|
|
# use the cover presented on the homepage
|
|
cover_url = 'http://www.taz.de/digitaz/.s1jpeg320'
|
|
|
|
no_stylesheets = True # default value is False, but True makes process much faster
|
|
keep_only_tags = [
|
|
dict(name=['div'], attrs={'class': re.compile(r".*\bsect_article\b.*")})
|
|
]
|
|
remove_tags = [
|
|
dict(name=['div'], attrs={'class': 'sectfoot'}),
|
|
# remove: taz paywall
|
|
dict(name=['div'], attrs={'id': 'tzi_paywall'})
|
|
]
|
|
|
|
# with article pictures on Kindle super-slow
|
|
# def populate_article_metadata(self, article, soup, first):
|
|
# if first and hasattr(self, 'add_toc_thumbnail'):
|
|
# picdiv = soup.find('img')
|
|
# if picdiv is not None:
|
|
# self.add_toc_thumbnail(article,picdiv['src'])
|