Update Guardian & Observer

2025-07-08 10:44:09 -04:00 · 2023-07-12 08:06:27 +05:30 · 2023-07-12 08:06:27 +05:30 · 88c92c56f7
commit 88c92c56f7
parent 6c8faf379f
1 changed files with 7 additions and 43 deletions
--- a/recipes/guardian.recipe
+++ b/recipes/guardian.recipe
@ -20,10 +20,11 @@ def classes(classes):
 class Guardian(BasicNewsRecipe):
    title = u'The Guardian and The Observer'
-    if date.today().weekday() == 6:
+    is_observer = False
        base_url = "https://www.theguardian.com/observer"
    else:
    base_url = "https://www.theguardian.com/uk"
    if date.today().weekday() == 6:
        is_observer = True
        base_url = "https://www.theguardian.com/observer"
    __author__ = 'Kovid Goyal'
    language = 'en_GB'
@ -89,20 +90,8 @@ class Guardian(BasicNewsRecipe):
        br = BasicNewsRecipe.get_browser(self, *a, **kw)
        return br
-    def get_cover_url(self):
+    def parse_section(self, section_url):
-        coverdate = date.today()
+        soup = self.index_to_soup(section_url)
        if 'observer' in self.base_url:
            cover = (
                    'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' +
                    str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
        else:
            cover = (
                    'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' +
                    str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
        return cover
    def parse_observer_index(self, soup):
        for section in soup.findAll('section'):
            articles = []
            title = self.tag_to_string(section.find('h2'))
@ -120,32 +109,7 @@ class Guardian(BasicNewsRecipe):
            if articles:
                yield title, articles
    def parse_section(self, section_url, title_prefix=''):
        feeds = []
        soup = self.index_to_soup(section_url)
        if '/observer' in section_url:
            return list(self.parse_observer_index(soup))
        for section in soup.findAll('section'):
            title = title_prefix + self.tag_to_string(section.find(
                attrs={'class': 'fc-container__header__title'})).strip().capitalize()
            self.log('\nFound section:', title)
            if 'Video' in title:
                self.log('=======> Skip section:', title)
                continue
            feeds.append((title, []))
            for li in section.findAll('li'):
                for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
                    title = self.tag_to_string(a).strip()
                    url = a['href']
                    if url.startswith('/'):
                        url = self.base_url.rpartition('/')[0] + url
                    self.log(' ', title, url)
                    feeds[-1][1].append({'title': title, 'url': url})
                    break
        return feeds
    def parse_index(self):
-        feeds = self.parse_section(self.base_url)
+        feeds = list(self.parse_section(self.base_url))
-        feeds += self.parse_section(
+        feeds += list(self.parse_section('https://www.theguardian.com/uk/sport'))
            'https://www.theguardian.com/uk/sport', 'Sport - ')
        return feeds