mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Update Guardian & Observer
This commit is contained in:
parent
73ecd2a53d
commit
205c8e1f95
@ -102,9 +102,29 @@ class Guardian(BasicNewsRecipe):
|
|||||||
|
|
||||||
return cover
|
return cover
|
||||||
|
|
||||||
def parse_section(self, url, title_prefix=''):
|
def parse_observer_index(self, soup):
|
||||||
|
for section in soup.findAll('section'):
|
||||||
|
articles = []
|
||||||
|
title = self.tag_to_string(section.find('h2'))
|
||||||
|
if not title:
|
||||||
|
continue
|
||||||
|
self.log('Found section:', title)
|
||||||
|
for li in section.findAll('li'):
|
||||||
|
a = li.find('a', attrs={'href': True, 'aria-label': True})
|
||||||
|
if a:
|
||||||
|
url = a['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.base_url.rpartition('/')[0] + url
|
||||||
|
self.log('\t', a['aria-label'], url)
|
||||||
|
articles.append({'title': a['aria-label'], 'url': url})
|
||||||
|
if articles:
|
||||||
|
yield title, articles
|
||||||
|
|
||||||
|
def parse_section(self, section_url, title_prefix=''):
|
||||||
feeds = []
|
feeds = []
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(section_url)
|
||||||
|
if '/observer' in section_url:
|
||||||
|
return list(self.parse_observer_index(soup))
|
||||||
for section in soup.findAll('section'):
|
for section in soup.findAll('section'):
|
||||||
title = title_prefix + self.tag_to_string(section.find(
|
title = title_prefix + self.tag_to_string(section.find(
|
||||||
attrs={'class': 'fc-container__header__title'})).strip().capitalize()
|
attrs={'class': 'fc-container__header__title'})).strip().capitalize()
|
||||||
@ -117,6 +137,8 @@ class Guardian(BasicNewsRecipe):
|
|||||||
for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
|
for a in li.findAll('a', attrs={'data-link-name': 'article'}, href=True):
|
||||||
title = self.tag_to_string(a).strip()
|
title = self.tag_to_string(a).strip()
|
||||||
url = a['href']
|
url = a['href']
|
||||||
|
if url.startswith('/'):
|
||||||
|
url = self.base_url.rpartition('/')[0] + url
|
||||||
self.log(' ', title, url)
|
self.log(' ', title, url)
|
||||||
feeds[-1][1].append({'title': title, 'url': url})
|
feeds[-1][1].append({'title': title, 'url': url})
|
||||||
break
|
break
|
||||||
|
Loading…
x
Reference in New Issue
Block a user