From aba1097e1040dc5b09a259ad3356184cba3bee0e Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 8 Jan 2016 20:24:03 +0530 Subject: [PATCH] Update Tagesanzeiger --- recipes/tagesan.recipe | 82 ++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 38 deletions(-) diff --git a/recipes/tagesan.recipe b/recipes/tagesan.recipe index aac064645f..f80f337c2b 100644 --- a/recipes/tagesan.recipe +++ b/recipes/tagesan.recipe @@ -1,45 +1,51 @@ from calibre.web.feeds.news import BasicNewsRecipe -class AdvancedUserRecipe1284927619(BasicNewsRecipe): - title = u'Tagesanzeiger' - publisher = u'Tamedia AG' - oldest_article = 2 - __author__ = 'noxxx' - max_articles_per_feed = 100 - description = 'tagesanzeiger.ch: Nichts verpassen' - category = 'News, Politik, Nachrichten, Schweiz, Zuerich' - language = 'de' - conversion_options = { - 'comments' : description - ,'tags' : category - ,'language' : language - ,'publisher' : publisher +class AdvancedUserRecipe1450864843(BasicNewsRecipe): + title = u'Tagesanzeiger' + __author__ = 'noxxx' + language = 'de' + oldest_article = 1 + max_articles_per_feed = 8 + no_stylesheets = True + auto_cleanup = True + scale_news_images_to_device = True + compress_news_images = True + remove_javascript = True + encoding = 'utf-8' + remove_empty_feeds = True + ignore_duplicate_articles = {'title', 'url'} + + feeds = [ + (u'Front', u'http://www.tagesanzeiger.ch/rss.html'), + (u'Zurich', u'http://www.tagesanzeiger.ch/zuerich/rss.html'), + (u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html'), + (u'International', u'http://www.tagesanzeiger.ch/ausland/rss.html'), + (u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html'), + (u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html'), + (u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html'), + (u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html'), + (u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html'), + (u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html') + ] + + keep_only_tags = [dict(name='h1'), + dict(name='h3'), + dict(name='div', attrs={'id': 'topElement'}), + dict(name='div', attrs={'id': 'mainContent'}) + ] + + auto_cleanup_keep = {'//div[@id="article-image"]', + '//*[@class="standard"]' } - remove_tags = [ - dict(name='img') - ,dict(name='div',attrs={'class':['swissquote ad','boxNews','centerAD','contentTabs2','sbsLabel']}) - ,dict(name='div',attrs={'id':['colRightAd','singleRight','singleSmallRight','MailInfo','metaLine','sidebarSky','contentFooter','commentInfo','commentInfo2','commentInfo3','footerBottom','clear','boxExclusiv','singleLogo','navSearch','headerLogin','headerBottomRight','horizontalNavigation','subnavigation','googleAdSense','footerAd','contentbox','articleGalleryNav']}) - ,dict(name='form',attrs={'id':['articleMailForm','commentform']}) - ,dict(name='div',attrs={'style':['position:absolute']}) - ,dict(name='script',attrs={'type':['text/javascript']}) - ,dict(name='p',attrs={'class':['schreiben','smallPrint','charCounter','caption']}) - ] - feeds = [ - (u'Front', u'http://www.tagesanzeiger.ch/rss.html') - ,(u'Zürich', u'http://www.tagesanzeiger.ch/zuerich/rss.html') - ,(u'Schweiz', u'http://www.tagesanzeiger.ch/schweiz/rss.html') - ,(u'Ausland', u'http://www.tagesanzeiger.ch/ausland/rss.html') - ,(u'Digital', u'http://www.tagesanzeiger.ch/digital/rss.html') - ,(u'Wissen', u'http://www.tagesanzeiger.ch/wissen/rss.html') - ,(u'Panorama', u'http://www.tagesanzeiger.ch/panorama/rss.html') - ,(u'Wirtschaft', u'http://www.tagesanzeiger.ch/wirtschaft/rss.html') - ,(u'Sport', u'http://www.tagesanzeiger.ch/sport/rss.html') - ,(u'Kultur', u'http://www.tagesanzeiger.ch/kultur/rss.html') - ,(u'Leben', u'http://www.tagesanzeiger.ch/leben/rss.html') - ,(u'Auto', u'http://www.tagesanzeiger.ch/auto/rss.html')] + conversion_options = {'base_font_size': 12} - def print_version(self, url): - return url + '/print.html' + extra_css = 'h1 { font-size: 150%; font-weight: bold; } \ + h2 { font-size: 120%; font-weight: bold; } \ + h3 { font-size: 100%; font-style: italic; } \ + h4, h5, h6 { font-size: 100%; font-style: italic; }' + def get_cover_url(self): + self.cover_url = 'http://newsmail.cleartext.ch/referenz_tagesanzeiger.jpg' + return getattr(self, 'cover_url', None)