Update The Guardian

This commit is contained in:
Kovid Goyal 2014-12-06 07:57:26 +05:30
parent 54909e3a59
commit 30ebdfe1f3

View File

@ -38,27 +38,29 @@ class Guardian(BasicNewsRecipe):
ignore_sections = [] ignore_sections = []
timefmt = ' [%a, %d %b %Y]' timefmt = ' [%a, %d %b %Y]'
keep_only_tags = [ keep_only_tags = [
dict(name='div', attrs={'id':["content","article_header","main-article-info",]}), dict(name='div', attrs={'id':["content","article_header","main-article-info",]}),
] dict(attrs={'class':lambda x: x and set(x.split()).intersection({'content__head', 'content__main'})}),
]
remove_tags = [ remove_tags = [
dict(name='div', attrs={'class':["video-content","videos-third-column"]}), dict(name='div', attrs={'class':[
dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}), "video-content","videos-third-column", 'meta__extras', 'submeta-container submeta-container--break-at-leftcol ']}),
dict(name='div', attrs={'class':["guardian-tickets promo-component",]}), dict(name='div', attrs={'id':["article-toolbox","subscribe-feeds",]}),
dict(name='ul', attrs={'class':["pagination"]}), dict(name='div', attrs={'class':["guardian-tickets promo-component",]}),
dict(name='ul', attrs={'id':["content-actions"]}), dict(name='ul', attrs={'class':["pagination"]}),
# article history link dict(name='ul', attrs={'id':["content-actions"]}),
dict(name='a', attrs={'class':["rollover history-link"]}), # article history link
# "a version of this article ..." speil dict(name='a', attrs={'class':["rollover history-link"]}),
dict(name='div' , attrs={'class' : ['section']}), # "a version of this article ..." speil
# "about this article" js dialog dict(name='div' , attrs={'class' : ['section']}),
dict(name='div', attrs={'class':["share-top",]}), # "about this article" js dialog
# author picture dict(name='div', attrs={'class':["share-top",]}),
dict(name='img', attrs={'class':["contributor-pic-small"]}), # author picture
# embedded videos/captions dict(name='img', attrs={'class':["contributor-pic-small"]}),
dict(name='span',attrs={'class' : ['inline embed embed-media']}), # embedded videos/captions
# dict(name='img'), dict(name='span',attrs={'class' : ['inline embed embed-media']}),
] ]
use_embedded_content = False use_embedded_content = False
no_stylesheets = True no_stylesheets = True