# -*- coding: utf-8 -*- __license__ = 'GPL v3' __copyright__ = '2009, Gerhard Aigner ' ''' http://www.diepresse.at - Austrian Newspaper ''' import re from calibre.web.feeds.news import BasicNewsRecipe class DiePresseRecipe(BasicNewsRecipe): title = u'diePresse' __author__ = 'Gerhard Aigner' description = u'DiePresse.com - Die Online-Ausgabe der Österreichischen Tageszeitung Die Presse.' publisher ='DiePresse.com' category = 'news, politics, nachrichten, Austria' use_embedded_content = False remove_empty_feeds = True lang = 'de-AT' no_stylesheets = True encoding = 'ISO-8859-1' language = 'de' recursions = 0 oldest_article = 1 max_articles_per_feed = 100 html2lrf_options = [ '--comment' , description , '--category' , category , '--publisher', publisher ] html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' preprocess_regexps = [ (re.compile(r'Textversion', re.DOTALL), lambda match: ''), ] remove_tags = [dict(name='hr'), dict(name='br'), dict(name='small'), dict(name='img'), dict(name='div', attrs={'class':'textnavi'}), dict(name='h1', attrs={'class':'titel'}), dict(name='a', attrs={'class':'print'}), dict(name='div', attrs={'class':'hline'})] feeds = [(u'Politik', u'http://diepresse.com/rss/Politik'), (u'Wirtschaft', u'http://diepresse.com/rss/Wirtschaft'), (u'Europa', u'http://diepresse.com/rss/EU'), (u'Panorama', u'http://diepresse.com/rss/Panorama'), (u'Sport', u'http://diepresse.com/rss/Sport'), (u'Kultur', u'http://diepresse.com/rss/Kultur'), (u'Leben', u'http://diepresse.com/rss/Leben'), (u'Tech', u'http://diepresse.com/rss/Tech'), (u'Wissenschaft', u'http://diepresse.com/rss/Science'), (u'Bildung', u'http://diepresse.com/rss/Bildung'), (u'Gesundheit', u'http://diepresse.com/rss/Gesundheit'), (u'Recht', u'http://diepresse.com/rss/Recht'), (u'Spectrum', u'http://diepresse.com/rss/Spectrum'), (u'Meinung', u'http://diepresse.com/rss/Meinung')] def print_version(self, url): return url.replace('home','text/home') def preprocess_html(self, soup): soup.html['xml:lang'] = self.lang soup.html['lang'] = self.lang mtag = '' soup.head.insert(0,mtag) return soup