Update Fortune Magazine

This commit is contained in:
Kovid Goyal 2019-10-04 09:41:00 +05:30
parent 749460cbfc
commit 25e3c0a3ba
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -7,6 +7,19 @@ def classes(classes):
'class': lambda x: x and frozenset(x.split()).intersection(q)}) 'class': lambda x: x and frozenset(x.split()).intersection(q)})
def prefix_classes(classes):
q = classes.split()
def test(x):
if x:
for cls in x.split():
for c in q:
if cls.startswith(c):
return True
return False
return dict(attrs={'class': test})
class Fortune(BasicNewsRecipe): class Fortune(BasicNewsRecipe):
title = 'Fortune Magazine' title = 'Fortune Magazine'
@ -17,9 +30,9 @@ class Fortune(BasicNewsRecipe):
category = 'news' category = 'news'
encoding = 'UTF-8' encoding = 'UTF-8'
keep_only_tags = [ keep_only_tags = [
dict(name='h1', attrs={'class': lambda x: x and 'headline' in x}), prefix_classes('articleHeader__title-- centerAligned__meta-- featuredMedia__imageWrapper-- articleBody__wrapper--'),
classes('lead-media longform-bylines longform-timestamps author'), classes('lead-media longform-bylines longform-timestamps author'),
dict(id=['article-body', 'longform-body']), dict(id=['article-body', 'longform-body']),
] ]
no_javascript = True no_javascript = True
@ -48,20 +61,18 @@ class Fortune(BasicNewsRecipe):
articles = [] articles = []
# Go to the latestissue # Go to the latestissue
soup = self.index_to_soup('http://fortune.com/section/magazine/') soup = self.index_to_soup('https://fortune.com/section/magazine/')
articles = [] articles = []
for i, article in enumerate(soup.findAll('article', attrs={'class': lambda x: x and 'type-article' in x.split()})): for li in soup.findAll('li', attrs={'class': lambda x: x and 'termArchiveContentList__item--' in x}):
div = article.find('div', attrs={'class': lambda x: x and 'article-info' in x.split()}) a = li.find('a', href=True)
a = div.find('a', href=True)
url = a['href'] url = a['href']
if url.startswith('/'): div = li.find(attrs={'class': lambda x: x and 'termArchiveContentListItem__title--' in x})
url = 'http://fortune.com' + url title = self.tag_to_string(div)
title = self.tag_to_string(a)
ai = div.find('div', attrs={'class': lambda x: x and 'article-info-extended' in x.split()})
desc = '' desc = ''
if ai: div = li.find(attrs={'class': lambda x: x and 'termArchiveContentListItem__excerpt--' in x})
desc = self.tag_to_string(desc) if div is not None:
self.log('Article:', title, 'at', url) desc = self.tag_to_string(div)
self.log(title, url)
articles.append({'title': title, 'url': url, 'description': desc}) articles.append({'title': title, 'url': url, 'description': desc})
return [('Articles', articles)] return [('Articles', articles)]