mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update New York Times
This commit is contained in:
parent
32e83b742f
commit
4e0ada41f5
@ -79,9 +79,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_auto_size = 5
|
compress_news_images_auto_size = 5
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'aria-label':'tools'.split()}),
|
dict(attrs={'aria-label':'tools'.split()}),
|
||||||
|
dict(attrs={'aria-label': lambda x: x and 'New York Times Logo' in x}),
|
||||||
|
dict(href='#site-content #site-index'.split()),
|
||||||
dict(attrs={'aria-hidden':'true'}),
|
dict(attrs={'aria-hidden':'true'}),
|
||||||
dict(attrs={'data-videoid':True}),
|
dict(attrs={'data-videoid':True}),
|
||||||
dict(name='button meta link'.split()),
|
dict(name='button meta link'.split()),
|
||||||
@ -125,6 +128,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
for li in soup.findAll('li', attrs={'class': lambda x: x and x.startswith('css-')}):
|
for li in soup.findAll('li', attrs={'class': lambda x: x and x.startswith('css-')}):
|
||||||
if not li.contents and not li.string:
|
if not li.contents and not li.string:
|
||||||
li.extract()
|
li.extract()
|
||||||
|
|
||||||
|
# Ensure the headline is first
|
||||||
|
h1 = soup.find('h1', itemprop='headline')
|
||||||
|
if h1 is not None:
|
||||||
|
h1.extract()
|
||||||
|
soup.find('body').contents.insert(0, h1)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def read_nyt_metadata(self):
|
def read_nyt_metadata(self):
|
||||||
@ -240,6 +249,9 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
# return [('All articles', [
|
||||||
|
# {'title': 'XXXXX', 'url': 'https://www.nytimes.com/2018/05/31/theater/best-25-plays-how-we-made-the-list.html'},
|
||||||
|
# ])]
|
||||||
if is_web_edition:
|
if is_web_edition:
|
||||||
return self.parse_web_sections()
|
return self.parse_web_sections()
|
||||||
return self.parse_todays_page()
|
return self.parse_todays_page()
|
||||||
|
@ -79,9 +79,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
compress_news_images_auto_size = 5
|
compress_news_images_auto_size = 5
|
||||||
|
remove_attributes = ['style']
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'aria-label':'tools'.split()}),
|
dict(attrs={'aria-label':'tools'.split()}),
|
||||||
|
dict(attrs={'aria-label': lambda x: x and 'New York Times Logo' in x}),
|
||||||
|
dict(href='#site-content #site-index'.split()),
|
||||||
dict(attrs={'aria-hidden':'true'}),
|
dict(attrs={'aria-hidden':'true'}),
|
||||||
dict(attrs={'data-videoid':True}),
|
dict(attrs={'data-videoid':True}),
|
||||||
dict(name='button meta link'.split()),
|
dict(name='button meta link'.split()),
|
||||||
@ -125,6 +128,12 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
for li in soup.findAll('li', attrs={'class': lambda x: x and x.startswith('css-')}):
|
for li in soup.findAll('li', attrs={'class': lambda x: x and x.startswith('css-')}):
|
||||||
if not li.contents and not li.string:
|
if not li.contents and not li.string:
|
||||||
li.extract()
|
li.extract()
|
||||||
|
|
||||||
|
# Ensure the headline is first
|
||||||
|
h1 = soup.find('h1', itemprop='headline')
|
||||||
|
if h1 is not None:
|
||||||
|
h1.extract()
|
||||||
|
soup.find('body').contents.insert(0, h1)
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
def read_nyt_metadata(self):
|
def read_nyt_metadata(self):
|
||||||
@ -240,6 +249,9 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
# return [('All articles', [
|
||||||
|
# {'title': 'XXXXX', 'url': 'https://www.nytimes.com/2018/05/31/theater/best-25-plays-how-we-made-the-list.html'},
|
||||||
|
# ])]
|
||||||
if is_web_edition:
|
if is_web_edition:
|
||||||
return self.parse_web_sections()
|
return self.parse_web_sections()
|
||||||
return self.parse_todays_page()
|
return self.parse_todays_page()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user