From 81182e7ae87757185c5fd1a853ac48d4ea1701af Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 15 May 2016 07:58:38 +0530 Subject: [PATCH] Update Die Presse --- recipes/diepresse.recipe | 84 ++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 47 deletions(-) diff --git a/recipes/diepresse.recipe b/recipes/diepresse.recipe index 3e2523bd62..4adfb8b8b6 100644 --- a/recipes/diepresse.recipe +++ b/recipes/diepresse.recipe @@ -1,5 +1,6 @@ -# -*- coding: utf-8 -*- - +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +from __future__ import unicode_literals, division, absolute_import, print_function __license__ = 'GPL v3' __copyright__ = '2009, Gerhard Aigner ' @@ -10,64 +11,53 @@ import re from calibre.web.feeds.news import BasicNewsRecipe class DiePresseRecipe(BasicNewsRecipe): - title = u'diePresse' + title = 'Die Presse' __author__ = 'Gerhard Aigner' - description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' - publisher ='DiePresse.com' + description = 'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' + publisher = 'Die Presse Verlags-Gesellschaft m.b.H. Co KG' + language = 'de_AT' category = 'news, politics, nachrichten, Austria' use_embedded_content = False remove_empty_feeds = True - lang = 'de-AT' no_stylesheets = True - encoding = 'ISO-8859-1' - language = 'de' recursions = 0 oldest_article = 1 max_articles_per_feed = 100 - - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - preprocess_regexps = [ - (re.compile(r'Textversion', re.DOTALL), lambda match: ''), + (re.compile(r'Textversion', re.DOTALL), lambda match: ''), ] - + remove_tags = [dict(name='hr'), - dict(name='br'), - dict(name='small'), - dict(name='img'), - dict(name='div', attrs={'class':'textnavi'}), - dict(name='h1', attrs={'class':'titel'}), - dict(name='a', attrs={'class':'print'}), - dict(name='div', attrs={'class':'hline'})] - - feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), - (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), - (u'Europa', u'http://diepresse.com/rss/EU'), - (u'Panorama', u'http://diepresse.com/rss/Panorama'), - (u'Sport', u'http://diepresse.com/rss/Sport'), - (u'Kultur', u'http://diepresse.com/rss/Kultur'), - (u'Leben', u'http://diepresse.com/rss/Leben'), - (u'Tech', u'http://diepresse.com/rss/Tech'), - (u'Wissenschaft', u'http://diepresse.com/rss/Science'), - (u'Bildung', u'http://diepresse.com/rss/Bildung'), - (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), - (u'Recht', u'http://diepresse.com/rss/Recht'), - (u'Spectrum', u'http://diepresse.com/rss/Spectrum'), - (u'Meinung', u'http://diepresse.com/rss/Meinung')] + dict(name='br'), + dict(name='small'), + dict(name='img'), + dict(name='div', attrs={'class':'textnavi'}), + dict(name='h1', attrs={'class':'titel'}), + dict(name='a', attrs={'class':'print'}), + dict(name='div', attrs={'class':'hline'})] + + remove_tags_after = [ + dict(name='div', attrs={'class':'articletext'}) + ] + + feeds = [ + ('Politik', 'http://diepresse.com/rss/Politik'), + ('Wirtschaft', 'http://diepresse.com/rss/Wirtschaft'), + ('Europa', 'http://diepresse.com/rss/EU'), + ('Panorama', 'http://diepresse.com/rss/Panorama'), + ('Sport', 'http://diepresse.com/rss/Sport'), + ('Kultur', 'http://diepresse.com/rss/Kultur'), + ('Leben', 'http://diepresse.com/rss/Leben'), + ('Tech', 'http://diepresse.com/rss/Tech'), + ('Wissenschaft', 'http://diepresse.com/rss/Science'), + ('Bildung', 'http://diepresse.com/rss/Bildung'), + ('Gesundheit', 'http://diepresse.com/rss/Gesundheit'), + ('Recht', 'http://diepresse.com/rss/Recht'), + ('Spectrum', 'http://diepresse.com/rss/Spectrum'), + ('Meinung', 'http://diepresse.com/rss/Meinung') + ] def print_version(self, url): return url.replace('home','text/home') - - def preprocess_html(self, soup): - soup.html['xml:lang'] = self.lang - soup.html['lang'] = self.lang - mtag = '' - soup.head.insert(0,mtag) - return soup \ No newline at end of file