#!/usr/bin/env python # vim:fileencoding=utf-8 from __future__ import unicode_literals, division, absolute_import, print_function ''' focus.de ''' from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1305567197(BasicNewsRecipe): title = 'Focus (DE)' __author__ = 'Anonymous' description = 'RSS-Feeds von Focus.de' language = 'de' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True use_embedded_content = False remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} feeds = [ ('Politik', 'http://rss.focus.de/politik/'), ('Finanzen', 'http://rss.focus.de/finanzen/'), ('Gesundheit', 'http://rss.focus.de/gesundheit/'), ('Panorama', 'http://rss.focus.de/panorama/'), ('Digital', 'http://rss.focus.de/digital/'), ('Reisen', 'http://rss.focus.de/reisen/') ] keep_only_tags = [ dict(name='div', attrs={'id': 'article'}) ] remove_tags = [ dict(name='div', attrs={'class': ['inimagebuttons', 'kolumneHead clearfix']}) ] remove_attributes = ['width', 'height'] extra_css = 'h1 {font-size: 1.6em; text-align: left; margin-top: 0em} \ h2 {font-size: 1em; text-align: left} \ .overhead {margin-bottom: 0em} \ .caption {font-size: 0.6em}' def print_version(self, url): return url + '?drucken=1' def preprocess_html(self, soup): # remove useless references to videos for item in soup.findAll('h2'): if item.string: txt = item.string.upper() if txt.startswith('IM VIDEO:') or txt.startswith('VIDEO:'): item.extract() return soup