diff --git a/recipes/taz_rss.recipe b/recipes/taz_rss.recipe index 0535b6ef3a..d4fc0237da 100644 --- a/recipes/taz_rss.recipe +++ b/recipes/taz_rss.recipe @@ -1,3 +1,4 @@ + __license__ = 'GPL v3' __copyright__ = '2013, Alexander Schremmer , Robert Riemann ' @@ -6,10 +7,15 @@ from calibre.web.feeds.news import BasicNewsRecipe class TazRSSRecipe(BasicNewsRecipe): title = u'Taz - die Tageszeitung' - description = u'Taz.de - die tageszeitung' + description = u'Taz.de - die tageszeitung (Anpassung von Robert)' __author__ = 'Alexander Schremmer, Robert Riemann' language = 'de' lang = 'de-DE' + category = 'news, Germany' + timefmt = ' [%a, %d %b %Y]' + publication_type = 'newspaper' + remove_empty_feeds = True + use_embedded_content = False oldest_article = 7 max_articles_per_feed = 100 publisher = 'taz Entwicklungs GmbH & Co. Medien KG' @@ -20,24 +26,37 @@ class TazRSSRecipe(BasicNewsRecipe): 'language': lang, } feeds = [ - (u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'), - (u'Politik', u'http://www.taz.de/Politik/!p2;rss/'), - (u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'), - (u'Netz', u'http://www.taz.de/Netz/!p5;rss/'), - (u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'), - (u'Leben', u'http://www.taz.de/Leben/!p10;rss/'), - (u'Sport', u'http://www.taz.de/Sport/!p12;rss/'), - (u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'), - (u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'), - (u'Nord', u'http://www.taz.de/Nord/!p11;rss/') + (u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'), + (u'Politik', u'http://www.taz.de/Politik/!p2;rss/'), + (u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'), + (u'Netz', u'http://www.taz.de/Netz/!p5;rss/'), + (u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'), + (u'Leben', u'http://www.taz.de/Leben/!p10;rss/'), + (u'Sport', u'http://www.taz.de/Sport/!p12;rss/'), + (u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'), + (u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'), + (u'Nord', u'http://www.taz.de/Nord/!p11;rss/') ] + # omit articles already linked in Schlagzeilen feed + ignore_duplicate_articles = {'title', 'url'} + + # use the cover presented on the homepage + cover_url = 'http://www.taz.de/digitaz/.s1jpeg320' + keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})] remove_tags = [ - dict(name=['div'], attrs={'class': 'artikelwerbung'}), - dict(name=['ul'], attrs={'class': 'toolbar'}), - # remove: taz paywall - dict(name=['div'], attrs={'id': 'tzi_paywall'}), - # remove: Artikel zum Thema (not working on Kindle) - dict(name=['div'], attrs={'class': re.compile(r".*\bsect_seealso\b.*")}), - dict(name=['div'], attrs={'class': 'sectfoot'}) + dict(name=['div'], attrs={'class': 'artikelwerbung'}), + dict(name=['ul'], attrs={'class': 'toolbar'}), + # remove: taz paywall + dict(name=['div'], attrs={'id': 'tzi_paywall'}), + # remove: Artikel zum Thema (not working on Kindle) + dict(name=['div'], attrs={'class': re.compile(r".*\bsect_seealso\b.*")}), + dict(name=['div'], attrs={'class': 'sectfoot'}) ] + +# with article pictures on Kindle super-slow +# def populate_article_metadata(self, article, soup, first): +# if first and hasattr(self, 'add_toc_thumbnail'): +# picdiv = soup.find('img') +# if picdiv is not None: +# self.add_toc_thumbnail(article,picdiv['src'])