mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Update The New York Times
This commit is contained in:
parent
4fc540bdaa
commit
37e75fcae1
@ -82,10 +82,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'aria-label':'tools'.split()}),
|
dict(attrs={'aria-label':'tools'.split()}),
|
||||||
|
dict(attrs={'aria-hidden':'true'}),
|
||||||
dict(attrs={'data-videoid':True}),
|
dict(attrs={'data-videoid':True}),
|
||||||
dict(name='button meta link'.split()),
|
dict(name='button meta link'.split()),
|
||||||
dict(id=lambda x: x and x.startswith('story-ad-')),
|
dict(id=lambda x: x and x.startswith('story-ad-')),
|
||||||
dict(name='head'),
|
dict(name='head'),
|
||||||
|
dict(role='toolbar'),
|
||||||
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
||||||
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
||||||
dict(id=lambda x: x and 'sharetools-' in x),
|
dict(id=lambda x: x and 'sharetools-' in x),
|
||||||
@ -106,7 +108,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id='story')
|
dict(id='story'),
|
||||||
]
|
]
|
||||||
body = Tag(soup, 'body')
|
body = Tag(soup, 'body')
|
||||||
for spec in keep_only_tags:
|
for spec in keep_only_tags:
|
||||||
@ -114,12 +116,6 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
body.insert(len(body.contents), tag)
|
body.insert(len(body.contents), tag)
|
||||||
soup.find('body').replaceWith(body)
|
soup.find('body').replaceWith(body)
|
||||||
|
|
||||||
# Remove the header bar with New York Times as an SVG in it
|
|
||||||
for svg in soup.findAll('svg'):
|
|
||||||
h = svg.findParent('header')
|
|
||||||
if h is not None:
|
|
||||||
h.extract()
|
|
||||||
|
|
||||||
# Add a space to the dateline
|
# Add a space to the dateline
|
||||||
t = soup.find(**classes('dateline'))
|
t = soup.find(**classes('dateline'))
|
||||||
if t is not None:
|
if t is not None:
|
||||||
|
@ -82,10 +82,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'aria-label':'tools'.split()}),
|
dict(attrs={'aria-label':'tools'.split()}),
|
||||||
|
dict(attrs={'aria-hidden':'true'}),
|
||||||
dict(attrs={'data-videoid':True}),
|
dict(attrs={'data-videoid':True}),
|
||||||
dict(name='button meta link'.split()),
|
dict(name='button meta link'.split()),
|
||||||
dict(id=lambda x: x and x.startswith('story-ad-')),
|
dict(id=lambda x: x and x.startswith('story-ad-')),
|
||||||
dict(name='head'),
|
dict(name='head'),
|
||||||
|
dict(role='toolbar'),
|
||||||
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
||||||
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
||||||
dict(id=lambda x: x and 'sharetools-' in x),
|
dict(id=lambda x: x and 'sharetools-' in x),
|
||||||
@ -106,7 +108,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(id='story')
|
dict(id='story'),
|
||||||
]
|
]
|
||||||
body = Tag(soup, 'body')
|
body = Tag(soup, 'body')
|
||||||
for spec in keep_only_tags:
|
for spec in keep_only_tags:
|
||||||
@ -114,12 +116,6 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
body.insert(len(body.contents), tag)
|
body.insert(len(body.contents), tag)
|
||||||
soup.find('body').replaceWith(body)
|
soup.find('body').replaceWith(body)
|
||||||
|
|
||||||
# Remove the header bar with New York Times as an SVG in it
|
|
||||||
for svg in soup.findAll('svg'):
|
|
||||||
h = svg.findParent('header')
|
|
||||||
if h is not None:
|
|
||||||
h.extract()
|
|
||||||
|
|
||||||
# Add a space to the dateline
|
# Add a space to the dateline
|
||||||
t = soup.find(**classes('dateline'))
|
t = soup.find(**classes('dateline'))
|
||||||
if t is not None:
|
if t is not None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user