Update The New York Times

This commit is contained in:
Kovid Goyal 2018-05-24 19:04:21 +05:30
parent 4fc540bdaa
commit 37e75fcae1
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 6 additions and 14 deletions

View File

@ -82,10 +82,12 @@ class NewYorkTimes(BasicNewsRecipe):
remove_tags = [
dict(attrs={'aria-label':'tools'.split()}),
dict(attrs={'aria-hidden':'true'}),
dict(attrs={'data-videoid':True}),
dict(name='button meta link'.split()),
dict(id=lambda x: x and x.startswith('story-ad-')),
dict(name='head'),
dict(role='toolbar'),
dict(name='a', href=lambda x: x and '#story-continues-' in x),
dict(name='a', href=lambda x: x and '#whats-next' in x),
dict(id=lambda x: x and 'sharetools-' in x),
@ -106,7 +108,7 @@ class NewYorkTimes(BasicNewsRecipe):
]
else:
keep_only_tags = [
dict(id='story')
dict(id='story'),
]
body = Tag(soup, 'body')
for spec in keep_only_tags:
@ -114,12 +116,6 @@ class NewYorkTimes(BasicNewsRecipe):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
# Remove the header bar with New York Times as an SVG in it
for svg in soup.findAll('svg'):
h = svg.findParent('header')
if h is not None:
h.extract()
# Add a space to the dateline
t = soup.find(**classes('dateline'))
if t is not None:

View File

@ -82,10 +82,12 @@ class NewYorkTimes(BasicNewsRecipe):
remove_tags = [
dict(attrs={'aria-label':'tools'.split()}),
dict(attrs={'aria-hidden':'true'}),
dict(attrs={'data-videoid':True}),
dict(name='button meta link'.split()),
dict(id=lambda x: x and x.startswith('story-ad-')),
dict(name='head'),
dict(role='toolbar'),
dict(name='a', href=lambda x: x and '#story-continues-' in x),
dict(name='a', href=lambda x: x and '#whats-next' in x),
dict(id=lambda x: x and 'sharetools-' in x),
@ -106,7 +108,7 @@ class NewYorkTimes(BasicNewsRecipe):
]
else:
keep_only_tags = [
dict(id='story')
dict(id='story'),
]
body = Tag(soup, 'body')
for spec in keep_only_tags:
@ -114,12 +116,6 @@ class NewYorkTimes(BasicNewsRecipe):
body.insert(len(body.contents), tag)
soup.find('body').replaceWith(body)
# Remove the header bar with New York Times as an SVG in it
for svg in soup.findAll('svg'):
h = svg.findParent('header')
if h is not None:
h.extract()
# Add a space to the dateline
t = soup.find(**classes('dateline'))
if t is not None: