mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Update Guardian & Observer
This commit is contained in:
parent
6c8faf379f
commit
88c92c56f7
@ -20,10 +20,11 @@ def classes(classes):
|
||||
class Guardian(BasicNewsRecipe):
|
||||
|
||||
title = u'The Guardian and The Observer'
|
||||
is_observer = False
|
||||
base_url = "https://www.theguardian.com/uk"
|
||||
if date.today().weekday() == 6:
|
||||
is_observer = True
|
||||
base_url = "https://www.theguardian.com/observer"
|
||||
else:
|
||||
base_url = "https://www.theguardian.com/uk"
|
||||
|
||||
__author__ = 'Kovid Goyal'
|
||||
language = 'en_GB'
|
||||
@ -89,20 +90,8 @@ class Guardian(BasicNewsRecipe):
|
||||
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
||||
return br
|
||||
|
||||
def get_cover_url(self):
|
||||
coverdate = date.today()
|
||||
if 'observer' in self.base_url:
|
||||
cover = (
|
||||
'https://www.thepaperboy.com/frontpages/archive/The_Observer_' + str(coverdate.day) + '_' +
|
||||
str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
|
||||
else:
|
||||
cover = (
|
||||
'https://www.thepaperboy.com/frontpages/archive/The_Guardian_' + str(coverdate.day) + '_' +
|
||||
str(coverdate.month) + '_' + str(coverdate.year) + '_400.jpg')
|
||||
|
||||
return cover
|
||||
|
||||
def parse_observer_index(self, soup):
|
||||
def parse_section(self, section_url):
|
||||
soup = self.index_to_soup(section_url)
|
||||
for section in soup.findAll('section'):
|
||||
articles = []
|
||||
title = self.tag_to_string(section.find('h2'))
|
||||
@ -120,32 +109,7 @@ class Guardian(BasicNewsRecipe):
|
||||
if articles:
|
||||
yield title, articles
|
||||
|
||||
def parse_section(self, section_url, title_prefix=''):
|
||||
feeds = []
|
||||
soup = self.index_to_soup(section_url)
|
||||
if '/observer' in section_url:
|
||||
return list(self.parse_observer_index(soup))
|
||||
for section in soup.findAll('section'):
|
||||
title = title_prefix + self.tag_to_string(section.find(
|
||||
attrs={'class': 'fc-container__header__title'})).strip().capitalize()
|
||||
self.log('\nFound section:', title)
|
||||
if 'Video' in title:
|
||||
self.log('=======> Skip section:', title)
|
||||
continue
|
||||
feeds.append((title, []))
|
||||
for li in section.findAll('li'):
|
||||
for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
|
||||
title = self.tag_to_string(a).strip()
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = self.base_url.rpartition('/')[0] + url
|
||||
self.log(' ', title, url)
|
||||
feeds[-1][1].append({'title': title, 'url': url})
|
||||
break
|
||||
return feeds
|
||||
|
||||
def parse_index(self):
|
||||
feeds = self.parse_section(self.base_url)
|
||||
feeds += self.parse_section(
|
||||
'https://www.theguardian.com/uk/sport', 'Sport - ')
|
||||
feeds = list(self.parse_section(self.base_url))
|
||||
feeds += list(self.parse_section('https://www.theguardian.com/uk/sport'))
|
||||
return feeds
|
||||
|
Loading…
x
Reference in New Issue
Block a user