__license__ = 'GPL v3' __copyright__ = '2013, Alexander Schremmer , Robert Riemann ' import re from calibre.web.feeds.news import BasicNewsRecipe class TazRSSRecipe(BasicNewsRecipe): title = u'Taz - die Tageszeitung' description = u'Taz.de - die tageszeitung (Anpassung von Robert)' __author__ = 'Alexander Schremmer, Robert Riemann' language = 'de' lang = 'de-DE' category = 'news, Germany' timefmt = ' [%a, %d %b %Y]' publication_type = 'newspaper' remove_empty_feeds = True use_embedded_content = False oldest_article = 7 max_articles_per_feed = 100 publisher = 'taz Entwicklungs GmbH & Co. Medien KG' # masthead_url = u'http://galeria-autonomica.de/wp-content/uploads/a_taz-logo.gif' masthead_url = u'http://upload.wikimedia.org/wikipedia/de/thumb/1/15/Die-Tageszeitung-Logo.svg/500px-Die-Tageszeitung-Logo.svg.png' conversion_options = {'publisher': publisher, 'language': lang, } feeds = [ (u'Schlagzeilen', u'http://www.taz.de/!p3270;rss/'), (u'Politik', u'http://www.taz.de/Politik/!p2;rss/'), (u'Zukunft', u'http://www.taz.de/Zukunft/!p4;rss/'), (u'Netz', u'http://www.taz.de/Netz/!p5;rss/'), (u'Debatte', u'http://www.taz.de/Debatte/!p9;rss/'), (u'Leben', u'http://www.taz.de/Leben/!p10;rss/'), (u'Sport', u'http://www.taz.de/Sport/!p12;rss/'), (u'Wahrheit', u'http://www.taz.de/Wahrheit/!p13;rss/'), (u'Berlin', u'http://www.taz.de/Berlin/!p14;rss/'), (u'Nord', u'http://www.taz.de/Nord/!p11;rss/') ] # omit articles already linked in Schlagzeilen feed ignore_duplicate_articles = {'title', 'url'} # use the cover presented on the homepage cover_url = 'http://www.taz.de/digitaz/.s1jpeg320' no_stylesheets = True # default value is False, but True makes process much faster keep_only_tags = [ dict(name=['div'], attrs={'class': re.compile(r".*\bsect_article\b.*")}) ] remove_tags = [ dict(name=['div'], attrs={'class': 'sectfoot'}), # remove: taz paywall dict(name=['div'], attrs={'id': 'tzi_paywall'}) ] # with article pictures on Kindle super-slow # def populate_article_metadata(self, article, soup, first): # if first and hasattr(self, 'add_toc_thumbnail'): # picdiv = soup.find('img') # if picdiv is not None: # self.add_toc_thumbnail(article,picdiv['src'])