__license__ = 'GPL v3' __copyright__ = '2009-2010, Darko Miletic ' ''' www.nzz.ch ''' from calibre.web.feeds.recipes import BasicNewsRecipe class Nzz(BasicNewsRecipe): title = 'NZZ Online' __author__ = 'Darko Miletic' description = 'Laufend aktualisierte Nachrichten, Analysen und Hintergruende zu Politik, Wirtschaft, Kultur und Sport' publisher = 'NZZ AG' category = 'news, politics, nachrichten, Switzerland' oldest_article = 2 max_articles_per_feed = 100 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'de' extra_css = """ body{font-family: Georgia,"Times New Roman",Times,serif } .artikel h3,.artikel h4,.bildLegende,.question,.autor{font-family: Arial,Verdana,Helvetica,sans-serif} .bildLegende{font-size: small} .autor{font-size: 0.9375em; color: #666666} .quote{font-size: large !important; font-style: italic; font-weight: normal !important; border-bottom: 1px dotted #BFBFBF; border-top: 1px dotted #BFBFBF; line-height: 1.25em} .quelle{color: #666666; font-style: italic; white-space: nowrap} """ conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } keep_only_tags = [dict(name='div', attrs={'class':'zone'})] remove_tags_before = dict(name='p', attrs={'class':'dachzeile'}) remove_tags_after=dict(name='p', attrs={'class':'fussnote'}) remove_attributes=['width','height','lang'] remove_tags = [ dict(name=['object','link','base','meta','iframe']) ,dict(attrs={'id':'content_rectangle_1'}) ,dict(attrs={'class':['weiterfuehrendeLinks','fussnote','video']}) ] feeds = [ (u'International' , u'http://www.nzz.ch/nachrichten/international?rss=true') ,(u'Schweiz' , u'http://www.nzz.ch/nachrichten/schweiz?rss=true') ,(u'Wirtschaft' , u'http://www.nzz.ch/nachrichten/wirtschaft/aktuell?rss=true') ,(u'Finanzmaerkte' , u'http://www.nzz.ch/finanzen/nachrichten?rss=true') ,(u'Zuerich' , u'http://www.nzz.ch/nachrichten/zuerich?rss=true') ,(u'Sport' , u'http://www.nzz.ch/nachrichten/sport?rss=true') ,(u'Panorama' , u'http://www.nzz.ch/nachrichten/panorama?rss=true') ,(u'Kultur' , u'http://www.nzz.ch/nachrichten/kultur/aktuell?rss=true') ,(u'Wissenschaft' , u'http://www.nzz.ch/nachrichten/wissenschaft?rss=true') ,(u'Medien' , u'http://www.nzz.ch/nachrichten/medien?rss=true') ,(u'Reisen' , u'http://www.nzz.ch/magazin/reisen?rss=true') ] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return self.adeify_images(soup)