diff --git a/recipes/taggeschau_de.recipe b/recipes/taggeschau_de.recipe index 9c0c60efca..ef6eb67db7 100644 --- a/recipes/taggeschau_de.recipe +++ b/recipes/taggeschau_de.recipe @@ -1,24 +1,41 @@ from calibre.web.feeds.news import BasicNewsRecipe +## History: +## 1: Base Version +## 2: Added rules for wdr.de, ndr.de, br-online.de +## 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de class Tagesschau(BasicNewsRecipe): title = 'Tagesschau' description = 'Nachrichten der ARD' publisher = 'ARD' language = 'de' + version = 3 - __author__ = 'Florian Andreas Pfaff' - oldest_article = 7 + __author__ = 'Florian Andreas Pfaff, a.peter' + oldest_article = 7 max_articles_per_feed = 100 - no_stylesheets = True + no_stylesheets = True + remove_javascript = True feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')] remove_tags = [ - dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}), - dict(name='div', - attrs={'id':['socialBookmarks','seitenanfang']}), - dict(name='ul', - attrs={'class':['directLinks','directLinks weltatlas']}), - dict(name='strong', attrs={'class':['boxTitle inv','inv']}) + dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio','infobox ','footer clearfix','inner recommendations','teaser teaser-08 nachrichten smallstandard','infobox-rechts','infobox-links','csl2','teaserBox metaBlock','articleA archiveDisclaimer']}), + dict(name='div', attrs={'id':['pageFunctions']}), ## wdr.de + dict(name='div', attrs={'class':['chart','footerService','toplink','assetsLeft','assetsFullsize']}), ## boerse.ard.de + dict(name='div', attrs={'class':['ardMehrZumThemaLinks','socialBookmarks','ardContentEnd','ardDisclaimer']}), ## sportschau.de + dict(name='div', attrs={'id':['socialBookmarks','seitenanfang','comment']}), + dict(name='ul', attrs={'class':['directLinks','directLinks weltatlas','iconList','right']}), + dict(name='strong', attrs={'class':['boxTitle inv','inv']}), + dict(name='div', attrs={'class':['moreInfo right','moreInfo']}), + dict(name='span', attrs={'class':['videoLink']}), + dict(name='img', attrs={'class':['zoom float_right']}), + dict(name='a', attrs={'id':['zoom']}) ] - keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})] + keep_only_tags = [dict(name='div', attrs={'id':'centerCol'}), + dict(name='div', attrs={'id':['mainColumn','ardContent']}), + dict(name='div', attrs={'class':['narrow clearfix','beitrag','detail_inlay','containerArticle noBorder','span-8']})] + + def get_masthead_url(self): + return 'http://intern.tagesschau.de/html/img/image.jpg' +