#!/usr/bin/env python2 ''' fr-online.de ''' from calibre.web.feeds.recipes import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class FR(BasicNewsRecipe): title = 'Frankfurter Rundschau' __author__ = 'Kovid Goyal' description = 'Nachrichten aus D und aller Welt' language = 'de' publication_type = 'newspaper' use_embedded_content = False remove_javascript = True no_stylesheets = True oldest_article = 1 # Increase this number if you're interested in older articles max_articles_per_feed = 50 # Seems a reasonable number to me encoding = 'cp1252' keep_only_tags = [ dict(id='fcms_page_main'), ] remove_tags = [ dict(name='footer'), dict(id='comments'), ] feeds = [ ('Startseite', u'http://www.fr.de/?_XML=rss'), ('Frankfurt', u'https://www.fr.de/frankfurt/?_XML=rss'), ('Rhein-Main', 'https://www.fr.de/rhein-main/?_XML=rss'), ('Politik', 'https://www.fr.de/politik/?_XML=rss'), ('Wirtschaft', 'https://www.fr.de/wirtschaft/?_XML=rss'), ('Sport', 'https://www.fr.de/sport/?_XML=rss'), ('Eintracht Frankfurt', 'https://www.fr.de/sport/eintracht/?_XML=rss'), ('Kultur', 'https://www.fr.de/kultur/?_XML=rss'), ('Wissen', 'https://www.fr.de/wissen/?_XML=rss'), ('Leben', 'https://www.fr.de/leben/?_XML=rss'), ('Panorama', 'https://www.fr.de/panorama/?_XML=rss'), ] def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] main = soup.find(id='fcms_page_main') for i, tag in tuple(enumerate(main)): if getattr(tag, 'name', None): main.replaceWith(tag) break return soup