mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update NYTimes Tech Beat
This commit is contained in:
parent
4e0ada41f5
commit
924acd1d0c
@ -19,7 +19,7 @@ def classes(classes):
|
|||||||
class NYTimesTechnology(BasicNewsRecipe):
|
class NYTimesTechnology(BasicNewsRecipe):
|
||||||
title = 'New York Times Technology Beat'
|
title = 'New York Times Technology Beat'
|
||||||
language = 'en'
|
language = 'en'
|
||||||
description = 'The latest in technology from David Pogue'
|
description = 'The latest in technology - Gadgetwise'
|
||||||
publisher = 'The New York Times'
|
publisher = 'The New York Times'
|
||||||
category = 'Technology'
|
category = 'Technology'
|
||||||
oldest_article = 14
|
oldest_article = 14
|
||||||
@ -31,9 +31,41 @@ class NYTimesTechnology(BasicNewsRecipe):
|
|||||||
(u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
|
(u'Gadgetwise', u'http://gadgetwise.blogs.nytimes.com/feed/'),
|
||||||
]
|
]
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='h1'),
|
dict(id='story'),
|
||||||
classes('extended-byline story-body'),
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('visually-hidden newsletter-signup nocontent robots-nocontent hidden'),
|
dict(attrs={'aria-label':'tools'.split()}),
|
||||||
|
dict(attrs={'aria-label': lambda x: x and 'New York Times Logo' in x}),
|
||||||
|
dict(href='#site-content #site-index'.split()),
|
||||||
|
dict(attrs={'aria-hidden':'true'}),
|
||||||
|
dict(attrs={'data-videoid':True}),
|
||||||
|
dict(name='button meta link'.split()),
|
||||||
|
dict(id=lambda x: x and x.startswith('story-ad-')),
|
||||||
|
dict(name='head'),
|
||||||
|
dict(role='toolbar'),
|
||||||
|
dict(name='a', href=lambda x: x and '#story-continues-' in x),
|
||||||
|
dict(name='a', href=lambda x: x and '#whats-next' in x),
|
||||||
|
dict(id=lambda x: x and 'sharetools-' in x),
|
||||||
|
dict(id='newsletter-promo supported-by-ad bottom-wrapper'.split()),
|
||||||
|
classes('story-print-citation supported-by accessibility-ad-header visually-hidden bottom-of-article ad'),
|
||||||
|
dict(attrs={'class': lambda x: x and (
|
||||||
|
'SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x or 'accessibility-visuallyHidden' in x or 'RelatedCoverage' in x)}),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# Add a space to the dateline
|
||||||
|
t = soup.find(**classes('dateline'))
|
||||||
|
if t is not None:
|
||||||
|
t.insert(0, ' ')
|
||||||
|
|
||||||
|
# Remove empty li tags
|
||||||
|
for li in soup.findAll('li', attrs={'class': lambda x: x and x.startswith('css-')}):
|
||||||
|
if not li.contents and not li.string:
|
||||||
|
li.extract()
|
||||||
|
|
||||||
|
# Ensure the headline is first
|
||||||
|
h1 = soup.find('h1', itemprop='headline')
|
||||||
|
if h1 is not None:
|
||||||
|
h1.extract()
|
||||||
|
soup.find('body').contents.insert(0, h1)
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user