From ffd1af3ddb213ecc4d21d233a3af5a47953733aa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 6 Aug 2023 10:20:36 +0530 Subject: [PATCH] Update NYTimes --- recipes/nytimes.recipe | 14 +++++++++----- recipes/nytimes_sub.recipe | 14 +++++++++----- 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index 0f9c866e4d..d11c5cc92a 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -200,14 +200,18 @@ class NewYorkTimes(BasicNewsRecipe): if article is None: a = li.find('a', href=True) if a is not None: - title = self.tag_to_string(li.find('h3')) + title = self.tag_to_string(li.find(['h3', 'h2'])).strip() + paras = li.findAll('p') + if not title: + title = self.tag_to_string(paras[0]).strip() + if not title: + raise ValueError('No title found in article') url = a['href'] if url.startswith('/'): url = 'https://www.nytimes.com' + url desc = '' - p = li.find('p') - if p is not None: - desc = self.tag_to_string(p) + if len(paras) > 0: + desc = self.tag_to_string(paras[-1]) date = '' d = date_from_url(url) if d is not None: @@ -219,7 +223,7 @@ class NewYorkTimes(BasicNewsRecipe): continue yield {'title': title, 'url': url, 'description': desc, 'date': date} continue - h2 = article.find('h2') + h2 = article.find(['h2', 'h3']) if h2 is not None: title = self.tag_to_string(h2) a = h2.find('a', href=True) diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index 5db1a02a57..36455d2d83 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -200,14 +200,18 @@ class NewYorkTimes(BasicNewsRecipe): if article is None: a = li.find('a', href=True) if a is not None: - title = self.tag_to_string(li.find('h3')) + title = self.tag_to_string(li.find(['h3', 'h2'])).strip() + paras = li.findAll('p') + if not title: + title = self.tag_to_string(paras[0]).strip() + if not title: + raise ValueError('No title found in article') url = a['href'] if url.startswith('/'): url = 'https://www.nytimes.com' + url desc = '' - p = li.find('p') - if p is not None: - desc = self.tag_to_string(p) + if len(paras) > 0: + desc = self.tag_to_string(paras[-1]) date = '' d = date_from_url(url) if d is not None: @@ -219,7 +223,7 @@ class NewYorkTimes(BasicNewsRecipe): continue yield {'title': title, 'url': url, 'description': desc, 'date': date} continue - h2 = article.find('h2') + h2 = article.find(['h2', 'h3']) if h2 is not None: title = self.tag_to_string(h2) a = h2.find('a', href=True)