from calibre.web.feeds.news import BasicNewsRecipe import re # History: # 1: Base Version # 2: Added rules for wdr.de, ndr.de, br-online.de # 3: Added rules for rbb-online.de, boerse.ard.de, sportschau.de # 4: New design of tagesschau.de implemented. Simplyfied. # 5: Taken out the pictures. class Tagesschau(BasicNewsRecipe): title = 'Tagesschau' description = 'Nachrichten der ARD' publisher = 'ARD' language = 'de' version = 5 __author__ = 'Florian Andreas Pfaff, a.peter' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True remove_javascript = True feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')] remove_tags = [ dict(attrs={'class': ['socialMedia', 'mediaLink', 'mediaInfo', 'linklist teaserImTeaser', 'modCon modConComments']}), ] keep_only_tags = [ dict(name='div', attrs={ 'class': ['section sectionZ', 'section sectionZ sectionArticle']}), dict(name='div', attrs={'class': re.compile(r'.*containerArticle.*')}) ] def get_article_url(self, article): ans = BasicNewsRecipe.get_article_url(self, article) if '/multimedia/' in ans: ans = None return ans