From f3af580d5e90280fc8e89ef7db72433a714ce717 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 24 Jul 2013 12:16:08 +0530 Subject: [PATCH] Update taz.de (RSS) --- recipes/taz_rss.recipe | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/recipes/taz_rss.recipe b/recipes/taz_rss.recipe index d4fc0237da..9f308d9739 100644 --- a/recipes/taz_rss.recipe +++ b/recipes/taz_rss.recipe @@ -1,4 +1,3 @@ - __license__ = 'GPL v3' __copyright__ = '2013, Alexander Schremmer , Robert Riemann ' @@ -43,15 +42,14 @@ class TazRSSRecipe(BasicNewsRecipe): # use the cover presented on the homepage cover_url = 'http://www.taz.de/digitaz/.s1jpeg320' - keep_only_tags = [dict(name='div', attrs={'class': 'sect sect_article'})] + no_stylesheets = True # default value is False, but True makes process much faster + keep_only_tags = [ + dict(name=['div'], attrs={'class': re.compile(r".*\bsect_article\b.*")}) + ] remove_tags = [ - dict(name=['div'], attrs={'class': 'artikelwerbung'}), - dict(name=['ul'], attrs={'class': 'toolbar'}), + dict(name=['div'], attrs={'class': 'sectfoot'}), # remove: taz paywall - dict(name=['div'], attrs={'id': 'tzi_paywall'}), - # remove: Artikel zum Thema (not working on Kindle) - dict(name=['div'], attrs={'class': re.compile(r".*\bsect_seealso\b.*")}), - dict(name=['div'], attrs={'class': 'sectfoot'}) + dict(name=['div'], attrs={'id': 'tzi_paywall'}) ] # with article pictures on Kindle super-slow