diff --git a/recipes/focus_de.recipe b/recipes/focus_de.recipe index d0b0f4aef8..7fbaa88623 100644 --- a/recipes/focus_de.recipe +++ b/recipes/focus_de.recipe @@ -1,48 +1,60 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function + +''' +focus.de +''' + from calibre.web.feeds.news import BasicNewsRecipe class AdvancedUserRecipe1305567197(BasicNewsRecipe): - title = u'Focus (DE)' + title = 'Focus (DE)' __author__ = 'Anonymous' - language = 'de' - oldest_article = 7 - max_articles_per_feed = 100 - no_stylesheets = True - use_embedded_content = False - remove_javascript = True + description = 'RSS-Feeds von Focus.de' + language = 'de' + + oldest_article = 7 + max_articles_per_feed = 100 + no_stylesheets = True + remove_javascript = True + use_embedded_content = False + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + + feeds = [ + ('Politik', 'http://rss.focus.de/politik/'), + ('Finanzen', 'http://rss.focus.de/finanzen/'), + ('Gesundheit', 'http://rss.focus.de/gesundheit/'), + ('Panorama', 'http://rss.focus.de/panorama/'), + ('Digital', 'http://rss.focus.de/digital/'), + ('Reisen', 'http://rss.focus.de/reisen/') + ] + + keep_only_tags = [ + dict(name='div', attrs={'id':'article'}) + ] + + remove_tags = [ + dict(name='div', attrs={'class':['inimagebuttons', + 'kolumneHead clearfix']}) + ] + + remove_attributes = ['width', 'height'] + + extra_css = 'h1 {font-size: 1.6em; text-align: left; margin-top: 0em} \ + h2 {font-size: 1em; text-align: left} \ + .overhead {margin-bottom: 0em} \ + .caption {font-size: 0.6em}' def print_version(self, url): return url + '?drucken=1' - keep_only_tags = [ - dict(name='div', attrs={'id':['article']}) ] - - remove_tags = [dict(name='div', attrs={'class':'sidebar'}), - dict(name='div', attrs={'class':'commentForm'}), - dict(name='div', attrs={'class':'comment clearfix oid-3534591 open'}), - dict(name='div', attrs={'class':'similarityBlock'}), - dict(name='div', attrs={'class':'footer'}), - dict(name='div', attrs={'class':'getMoreComments'}), - dict(name='div', attrs={'class':'moreComments'}), - dict(name='div', attrs={'class':'ads'}), - dict(name='div', attrs={'class':'articleContent'}), - - - ] - remove_tags_after = [ - dict(name='div',attrs={'class':['commentForm','title', 'actions clearfix']}) - ] - - - feeds = [ (u'Eilmeldungen', u'http://rss2.focus.de/c/32191/f/533875/index.rss'), - (u'Auto-News', u'http://rss2.focus.de/c/32191/f/443320/index.rss'), - (u'Digital-News', u'http://rss2.focus.de/c/32191/f/443315/index.rss'), - (u'Finanzen-News', u'http://rss2.focus.de/c/32191/f/443317/index.rss'), - (u'Gesundheit-News', u'http://rss2.focus.de/c/32191/f/443314/index.rss'), - (u'Immobilien-News', u'http://rss2.focus.de/c/32191/f/443318/index.rss'), - (u'Kultur-News', u'http://rss2.focus.de/c/32191/f/443321/index.rss'), - (u'Panorama-News', u'http://rss2.focus.de/c/32191/f/533877/index.rss'), - (u'Politik-News', u'http://rss2.focus.de/c/32191/f/443313/index.rss'), - (u'Reisen-News', u'http://rss2.focus.de/c/32191/f/443316/index.rss'), - (u'Sport-News', u'http://rss2.focus.de/c/32191/f/443319/index.rss'), - (u'Wissen-News', u'http://rss2.focus.de/c/32191/f/533876/index.rss'), - ] + def preprocess_html(self, soup): + # remove useless references to videos + for item in soup.findAll('h2'): + if item.string: + txt = item.string.upper() + if txt.startswith('IM VIDEO:') or txt.startswith('VIDEO:'): + item.extract() + return soup