Update Tagesschau

Fixes #1436196 [epaper from tagesschau.de shows pages of icons](https://bugs.launchpad.net/calibre/+bug/1436196)
This commit is contained in:
Kovid Goyal 2015-03-25 17:03:45 +05:30
parent d4a42179e2
commit da8a8781f7

View File

@ -23,21 +23,16 @@ class Tagesschau(BasicNewsRecipe):
feeds = [('Tagesschau', 'http://www.tagesschau.de/xml/rss2')]
remove_tags = [
dict(name='div', attrs={'class':['inline']}),
dict(name='div', attrs={'class':re.compile(r'.*linklist.*')}),
dict(name='div', attrs={'class':re.compile(r'.*media(Left|Right|Top|Bottom|Info).*')}),
dict(name='div', attrs={'class':re.compile(r'.*mod(Socialbar|ConComments).*')}),
dict(name='div', attrs={'class':re.compile(r'.*infokasten.*')}),
dict(name='div', attrs={'class':re.compile(r'.*articlePictureA.*')}),
dict(name='div', attrs={'id': re.compile(r'.*zoomTeaser.*')}),
dict(name='h2', attrs={'class':re.compile(r'.*conHeadline.*')}),
dict(name='ul', attrs={'class':['iconList']})
dict(attrs={'class':['socialMedia', 'mediaLink', 'mediaInfo', 'linklist teaserImTeaser', 'modCon modConComments']}),
]
keep_only_tags = [
dict(name='div', attrs={'class':['section sectionZ sectionArticle']}),
dict(name='div', attrs={'class':['section sectionZ', 'section sectionZ sectionArticle']}),
dict(name='div', attrs={'class':re.compile(r'.*containerArticle.*')})
]
def get_masthead_url(self):
return 'http://intern.tagesschau.de/html/img/image.jpg'
def get_article_url(self, article):
ans = BasicNewsRecipe.get_article_url(self, article)
if '/multimedia/' in ans:
ans = None
return ans