Update Guardian & Observer

This commit is contained in:
Kovid Goyal 2023-07-12 08:06:27 +05:30
parent 6c8faf379f
commit 88c92c56f7
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -20,10 +20,11 @@ def classes(classes):
class Guardian(BasicNewsRecipe): class Guardian(BasicNewsRecipe):
title = u'The Guardian and The Observer' title = u'The Guardian and The Observer'
if date.today().weekday() == 6: is_observer = False
base_url = "https://www.theguardian.com/observer"
else:
base_url = "https://www.theguardian.com/uk" base_url = "https://www.theguardian.com/uk"
if date.today().weekday() == 6:
is_observer = True
base_url = "https://www.theguardian.com/observer"
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
language = 'en_GB' language = 'en_GB'
@ -89,20 +90,8 @@ class Guardian(BasicNewsRecipe):
br = BasicNewsRecipe.get_browser(self, *a, **kw) br = BasicNewsRecipe.get_browser(self, *a, **kw)
return br return br
def get_cover_url(self): def parse_section(self, section_url):
coverdate = date.today() soup = self.index_to_soup(section_url)
if 'observer' in self.base_url:
cover = (
'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' +
str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
else:
cover = (
'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' +
str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
return cover
def parse_observer_index(self, soup):
for section in soup.findAll('section'): for section in soup.findAll('section'):
articles = [] articles = []
title = self.tag_to_string(section.find('h2')) title = self.tag_to_string(section.find('h2'))
@ -120,32 +109,7 @@ class Guardian(BasicNewsRecipe):
if articles: if articles:
yield title, articles yield title, articles
def parse_section(self, section_url, title_prefix=''):
feeds = []
soup = self.index_to_soup(section_url)
if '/observer' in section_url:
return list(self.parse_observer_index(soup))
for section in soup.findAll('section'):
title = title_prefix + self.tag_to_string(section.find(
attrs={'class': 'fc-container__header__title'})).strip().capitalize()
self.log('\nFound section:', title)
if 'Video' in title:
self.log('=======> Skip section:', title)
continue
feeds.append((title, []))
for li in section.findAll('li'):
for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
title = self.tag_to_string(a).strip()
url = a['href']
if url.startswith('/'):
url = self.base_url.rpartition('/')[0] + url
self.log(' ', title, url)
feeds[-1][1].append({'title': title, 'url': url})
break
return feeds
def parse_index(self): def parse_index(self):
feeds = self.parse_section(self.base_url) feeds = list(self.parse_section(self.base_url))
feeds += self.parse_section( feeds += list(self.parse_section('https://www.theguardian.com/uk/sport'))
'https://www.theguardian.com/uk/sport', 'Sport - ')
return feeds return feeds