mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 18:24:30 -04:00
Update Fortune Magazine
This commit is contained in:
parent
749460cbfc
commit
25e3c0a3ba
@ -7,6 +7,19 @@ def classes(classes):
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
def prefix_classes(classes):
|
||||
q = classes.split()
|
||||
|
||||
def test(x):
|
||||
if x:
|
||||
for cls in x.split():
|
||||
for c in q:
|
||||
if cls.startswith(c):
|
||||
return True
|
||||
return False
|
||||
return dict(attrs={'class': test})
|
||||
|
||||
|
||||
class Fortune(BasicNewsRecipe):
|
||||
|
||||
title = 'Fortune Magazine'
|
||||
@ -17,7 +30,7 @@ class Fortune(BasicNewsRecipe):
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [
|
||||
dict(name='h1', attrs={'class': lambda x: x and 'headline' in x}),
|
||||
prefix_classes('articleHeader__title-- centerAligned__meta-- featuredMedia__imageWrapper-- articleBody__wrapper--'),
|
||||
classes('lead-media longform-bylines longform-timestamps author'),
|
||||
dict(id=['article-body', 'longform-body']),
|
||||
]
|
||||
@ -48,20 +61,18 @@ class Fortune(BasicNewsRecipe):
|
||||
articles = []
|
||||
|
||||
# Go to the latestissue
|
||||
soup = self.index_to_soup('http://fortune.com/section/magazine/')
|
||||
soup = self.index_to_soup('https://fortune.com/section/magazine/')
|
||||
articles = []
|
||||
|
||||
for i, article in enumerate(soup.findAll('article', attrs={'class': lambda x: x and 'type-article' in x.split()})):
|
||||
div = article.find('div', attrs={'class': lambda x: x and 'article-info' in x.split()})
|
||||
a = div.find('a', href=True)
|
||||
for li in soup.findAll('li', attrs={'class': lambda x: x and 'termArchiveContentList__item--' in x}):
|
||||
a = li.find('a', href=True)
|
||||
url = a['href']
|
||||
if url.startswith('/'):
|
||||
url = 'http://fortune.com' + url
|
||||
title = self.tag_to_string(a)
|
||||
ai = div.find('div', attrs={'class': lambda x: x and 'article-info-extended' in x.split()})
|
||||
div = li.find(attrs={'class': lambda x: x and 'termArchiveContentListItem__title--' in x})
|
||||
title = self.tag_to_string(div)
|
||||
desc = ''
|
||||
if ai:
|
||||
desc = self.tag_to_string(desc)
|
||||
self.log('Article:', title, 'at', url)
|
||||
div = li.find(attrs={'class': lambda x: x and 'termArchiveContentListItem__excerpt--' in x})
|
||||
if div is not None:
|
||||
desc = self.tag_to_string(div)
|
||||
self.log(title, url)
|
||||
articles.append({'title': title, 'url': url, 'description': desc})
|
||||
return [('Articles', articles)]
|
||||
|
Loading…
x
Reference in New Issue
Block a user