From 88c92c56f7ecda281bc07c1c8e57cedc5739c7f9 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 12 Jul 2023 08:06:27 +0530 Subject: [PATCH] Update Guardian & Observer --- recipes/guardian.recipe | 50 ++++++----------------------------------- 1 file changed, 7 insertions(+), 43 deletions(-) diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe index bab2a67b86..bdc9425306 100644 --- a/recipes/guardian.recipe +++ b/recipes/guardian.recipe @@ -20,10 +20,11 @@ def classes(classes): class Guardian(BasicNewsRecipe): title = u'The Guardian and The Observer' + is_observer = False + base_url = "https://www.theguardian.com/uk" if date.today().weekday() == 6: + is_observer = True base_url = "https://www.theguardian.com/observer" - else: - base_url = "https://www.theguardian.com/uk" __author__ = 'Kovid Goyal' language = 'en_GB' @@ -89,20 +90,8 @@ class Guardian(BasicNewsRecipe): br = BasicNewsRecipe.get_browser(self, *a, **kw) return br - def get_cover_url(self): - coverdate = date.today() - if 'observer' in self.base_url: - cover = ( - 'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' + - str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg') - else: - cover = ( - 'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' + - str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg') - - return cover - - def parse_observer_index(self, soup): + def parse_section(self, section_url): + soup = self.index_to_soup(section_url) for section in soup.findAll('section'): articles = [] title = self.tag_to_string(section.find('h2')) @@ -120,32 +109,7 @@ class Guardian(BasicNewsRecipe): if articles: yield title, articles - def parse_section(self, section_url, title_prefix=''): - feeds = [] - soup = self.index_to_soup(section_url) - if '/observer' in section_url: - return list(self.parse_observer_index(soup)) - for section in soup.findAll('section'): - title = title_prefix + self.tag_to_string(section.find( - attrs={'class': 'fc-container__header__title'})).strip().capitalize() - self.log('\nFound section:', title) - if 'Video' in title: - self.log('=======> Skip section:', title) - continue - feeds.append((title, [])) - for li in section.findAll('li'): - for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True): - title = self.tag_to_string(a).strip() - url = a['href'] - if url.startswith('/'): - url = self.base_url.rpartition('/')[0] + url - self.log(' ', title, url) - feeds[-1][1].append({'title': title, 'url': url}) - break - return feeds - def parse_index(self): - feeds = self.parse_section(self.base_url) - feeds += self.parse_section( - 'https://www.theguardian.com/uk/sport', 'Sport - ') + feeds = list(self.parse_section(self.base_url)) + feeds += list(self.parse_section('https://www.theguardian.com/uk/sport')) return feeds