mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update NYTimes
This commit is contained in:
parent
009f8f33ad
commit
ffd1af3ddb
@ -200,14 +200,18 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
if article is None:
|
if article is None:
|
||||||
a = li.find('a', href=True)
|
a = li.find('a', href=True)
|
||||||
if a is not None:
|
if a is not None:
|
||||||
title = self.tag_to_string(li.find('h3'))
|
title = self.tag_to_string(li.find(['h3', 'h2'])).strip()
|
||||||
|
paras = li.findAll('p')
|
||||||
|
if not title:
|
||||||
|
title = self.tag_to_string(paras[0]).strip()
|
||||||
|
if not title:
|
||||||
|
raise ValueError('No title found in article')
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'https://www.nytimes.com' + url
|
url = 'https://www.nytimes.com' + url
|
||||||
desc = ''
|
desc = ''
|
||||||
p = li.find('p')
|
if len(paras) > 0:
|
||||||
if p is not None:
|
desc = self.tag_to_string(paras[-1])
|
||||||
desc = self.tag_to_string(p)
|
|
||||||
date = ''
|
date = ''
|
||||||
d = date_from_url(url)
|
d = date_from_url(url)
|
||||||
if d is not None:
|
if d is not None:
|
||||||
@ -219,7 +223,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
continue
|
continue
|
||||||
h2 = article.find('h2')
|
h2 = article.find(['h2', 'h3'])
|
||||||
if h2 is not None:
|
if h2 is not None:
|
||||||
title = self.tag_to_string(h2)
|
title = self.tag_to_string(h2)
|
||||||
a = h2.find('a', href=True)
|
a = h2.find('a', href=True)
|
||||||
|
@ -200,14 +200,18 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
if article is None:
|
if article is None:
|
||||||
a = li.find('a', href=True)
|
a = li.find('a', href=True)
|
||||||
if a is not None:
|
if a is not None:
|
||||||
title = self.tag_to_string(li.find('h3'))
|
title = self.tag_to_string(li.find(['h3', 'h2'])).strip()
|
||||||
|
paras = li.findAll('p')
|
||||||
|
if not title:
|
||||||
|
title = self.tag_to_string(paras[0]).strip()
|
||||||
|
if not title:
|
||||||
|
raise ValueError('No title found in article')
|
||||||
url = a['href']
|
url = a['href']
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = 'https://www.nytimes.com' + url
|
url = 'https://www.nytimes.com' + url
|
||||||
desc = ''
|
desc = ''
|
||||||
p = li.find('p')
|
if len(paras) > 0:
|
||||||
if p is not None:
|
desc = self.tag_to_string(paras[-1])
|
||||||
desc = self.tag_to_string(p)
|
|
||||||
date = ''
|
date = ''
|
||||||
d = date_from_url(url)
|
d = date_from_url(url)
|
||||||
if d is not None:
|
if d is not None:
|
||||||
@ -219,7 +223,7 @@ class NewYorkTimes(BasicNewsRecipe):
|
|||||||
continue
|
continue
|
||||||
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
yield {'title': title, 'url': url, 'description': desc, 'date': date}
|
||||||
continue
|
continue
|
||||||
h2 = article.find('h2')
|
h2 = article.find(['h2', 'h3'])
|
||||||
if h2 is not None:
|
if h2 is not None:
|
||||||
title = self.tag_to_string(h2)
|
title = self.tag_to_string(h2)
|
||||||
a = h2.find('a', href=True)
|
a = h2.find('a', href=True)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user