Update wash_post_print.recipe

This commit is contained in:
unkn0w7n 2023-05-08 20:43:01 +05:30 committed by GitHub
parent b10c910107
commit 63101aa07a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -37,23 +37,23 @@ class wapoprint(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/') soup = self.index_to_soup('https://www.washingtonpost.com/todays_paper/updates/')
if cover := soup.find('div', attrs={'class':lambda x: x and 'todays-content-image' in x.split()}): if img := soup.find('img', attrs={'src':lambda x: x and x.endswith('_FrontPage.png')}):
self.cover_url = cover.img['src'] self.cover_url = img['src']
main = soup.find('div', attrs={'id':'todays-paper'})
feeds = [] feeds = []
for div in main.findAll('div', attrs={'class': lambda x: x and 'todays-content' in x.split()}): for div in soup.findAll('section', attrs={'id': True}):
h2 = div.find('p', attrs={'class':lambda x: x and 'heading2' in x.split()}) secname = self.tag_to_string(div.find('label')).strip()
secname = self.tag_to_string(h2)
self.log(secname) self.log(secname)
articles = [] articles = []
for a in div.findAll('a', href=True, attrs={'class':'headline'}): for a in div.findAll('a', href=True):
url = a['href'] url = a['href']
title = self.tag_to_string(a) title = self.tag_to_string(a).strip()
articles.append({'title': title, 'url': url}) if not title or not url:
continue
self.log('\t', title) self.log('\t', title)
self.log('\t\t', url) self.log('\t\t', url)
articles.append({'title': title, 'url': url})
if articles: if articles:
feeds.append((secname, articles)) feeds.append((secname, articles))
return feeds return feeds