From 88c92c56f7ecda281bc07c1c8e57cedc5739c7f9 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 12 Jul 2023 08:06:27 +0530
Subject: [PATCH] Update Guardian & Observer

---
 recipes/guardian.recipe | 50 ++++++-----------------------------------
 1 file changed, 7 insertions(+), 43 deletions(-)

diff --git a/recipes/guardian.recipe b/recipes/guardian.recipe
index bab2a67b86..bdc9425306 100644
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@@ -20,10 +20,11 @@ def classes(classes):
 class Guardian(BasicNewsRecipe):
 
     title = u'The Guardian and The Observer'
+    is_observer = False
+    base_url = "https://www.theguardian.com/uk"
     if date.today().weekday() == 6:
+        is_observer = True
         base_url = "https://www.theguardian.com/observer"
-    else:
-        base_url = "https://www.theguardian.com/uk"
 
     __author__ = 'Kovid Goyal'
     language = 'en_GB'
@@ -89,20 +90,8 @@ class Guardian(BasicNewsRecipe):
         br = BasicNewsRecipe.get_browser(self, *a, **kw)
         return br
 
-    def get_cover_url(self):
-        coverdate = date.today()
-        if 'observer' in self.base_url:
-            cover = (
-                    'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' +
-                    str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
-        else:
-            cover = (
-                    'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' +
-                    str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
-
-        return cover
-
-    def parse_observer_index(self, soup):
+    def parse_section(self, section_url):
+        soup = self.index_to_soup(section_url)
         for section in soup.findAll('section'):
             articles = []
             title = self.tag_to_string(section.find('h2'))
@@ -120,32 +109,7 @@ class Guardian(BasicNewsRecipe):
             if articles:
                 yield title, articles
 
-    def parse_section(self, section_url, title_prefix=''):
-        feeds = []
-        soup = self.index_to_soup(section_url)
-        if '/observer' in section_url:
-            return list(self.parse_observer_index(soup))
-        for section in soup.findAll('section'):
-            title = title_prefix + self.tag_to_string(section.find(
-                attrs={'class': 'fc-container__header__title'})).strip().capitalize()
-            self.log('\nFound section:', title)
-            if 'Video' in title:
-                self.log('=======> Skip section:', title)
-                continue
-            feeds.append((title, []))
-            for li in section.findAll('li'):
-                for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
-                    title = self.tag_to_string(a).strip()
-                    url = a['href']
-                    if url.startswith('/'):
-                        url = self.base_url.rpartition('/')[0] + url
-                    self.log(' ', title, url)
-                    feeds[-1][1].append({'title': title, 'url': url})
-                    break
-        return feeds
-
     def parse_index(self):
-        feeds = self.parse_section(self.base_url)
-        feeds += self.parse_section(
-            'https://www.theguardian.com/uk/sport', 'Sport - ')
+        feeds = list(self.parse_section(self.base_url))
+        feeds += list(self.parse_section('https://www.theguardian.com/uk/sport'))
         return feeds