Updated taggeschau.de

This commit is contained in:
Kovid Goyal 2012-02-10 09:21:21 +05:30
parent 4f015c5efa
commit 8f3185872b

View File

@ -1,24 +1,41 @@
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
## History:
## 1: Base Version
## 2: Added rules for wdr.de, ndr.de, br-online.de
## 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de
class Tagesschau(BasicNewsRecipe): class Tagesschau(BasicNewsRecipe):
title = 'Tagesschau' title = 'Tagesschau'
description = 'Nachrichten der ARD' description = 'Nachrichten der ARD'
publisher = 'ARD' publisher = 'ARD'
language = 'de' language = 'de'
version = 3
__author__ = 'Florian Andreas Pfaff' __author__ = 'Florian Andreas Pfaff, a.peter'
oldest_article = 7 oldest_article = 7
max_articles_per_feed = 100 max_articles_per_feed = 100
no_stylesheets = True no_stylesheets = True
remove_javascript = True
feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')] feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio']}), dict(name='div', attrs={'class':['linksZumThema schmal','teaserBox','boxMoreLinks','directLinks','teaserBox boxtext','fPlayer','zitatBox breit flashaudio','infobox ','footer clearfix','inner recommendations','teaser teaser-08 nachrichten smallstandard','infobox-rechts','infobox-links','csl2','teaserBox metaBlock','articleA archiveDisclaimer']}),
dict(name='div', dict(name='div', attrs={'id':['pageFunctions']}), ## wdr.de
attrs={'id':['socialBookmarks','seitenanfang']}), dict(name='div', attrs={'class':['chart','footerService','toplink','assetsLeft','assetsFullsize']}), ## boerse.ard.de
dict(name='ul', dict(name='div', attrs={'class':['ardMehrZumThemaLinks','socialBookmarks','ardContentEnd','ardDisclaimer']}), ## sportschau.de
attrs={'class':['directLinks','directLinks weltatlas']}), dict(name='div', attrs={'id':['socialBookmarks','seitenanfang','comment']}),
dict(name='strong', attrs={'class':['boxTitle inv','inv']}) dict(name='ul', attrs={'class':['directLinks','directLinks weltatlas','iconList','right']}),
dict(name='strong', attrs={'class':['boxTitle inv','inv']}),
dict(name='div', attrs={'class':['moreInfo right','moreInfo']}),
dict(name='span', attrs={'class':['videoLink']}),
dict(name='img', attrs={'class':['zoom float_right']}),
dict(name='a', attrs={'id':['zoom']})
] ]
keep_only_tags = [dict(name='div', attrs={'id':'centerCol'})] keep_only_tags = [dict(name='div', attrs={'id':'centerCol'}),
dict(name='div', attrs={'id':['mainColumn','ardContent']}),
dict(name='div', attrs={'class':['narrow clearfix','beitrag','detail_inlay','containerArticle noBorder','span-8']})]
def get_masthead_url(self):
return 'http://intern.tagesschau.de/html/img/image.jpg'