mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
68 lines
2.5 KiB
Plaintext
68 lines
2.5 KiB
Plaintext
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
def classes(classes):
|
|
q = frozenset(classes.split(' '))
|
|
return dict(attrs={
|
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
|
|
|
|
class Fortune(BasicNewsRecipe):
|
|
|
|
title = 'Fortune Magazine'
|
|
__author__ = 'Rick Shang'
|
|
|
|
description = 'FORTUNE is a global business magazine that has been revered in its content and credibility since 1930. FORTUNE covers the entire field of business, including specific companies and business trends, prominent business leaders, and new ideas shaping the global marketplace.' # noqa
|
|
language = 'en'
|
|
category = 'news'
|
|
encoding = 'UTF-8'
|
|
keep_only_tags = [
|
|
dict(name='h1', attrs={'class': lambda x: x and 'headline' in x}),
|
|
classes('lead-media longform-bylines longform-timestamps author'),
|
|
dict(id=['article-body', 'longform-body']),
|
|
]
|
|
|
|
no_javascript = True
|
|
no_stylesheets = True
|
|
needs_subscription = 'optional'
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
if self.username and self.password:
|
|
br.open('http://fortune.com/sitemap/')
|
|
br.select_form(id='sign-in-form')
|
|
br['username'] = self.username
|
|
br['password'] = self.password
|
|
br.submit()
|
|
return br
|
|
|
|
def preprocess_html(self, soup, *a):
|
|
for div in soup.findAll(attrs={'class': lambda x: x and 'lazy-image' in x.split()}):
|
|
a = div.find('a', href=True)
|
|
if a is not None:
|
|
a.name = 'img'
|
|
a['src'] = a['href']
|
|
return soup
|
|
|
|
def parse_index(self):
|
|
articles = []
|
|
|
|
# Go to the latestissue
|
|
soup = self.index_to_soup('http://fortune.com/section/magazine/')
|
|
articles = []
|
|
|
|
for i, article in enumerate(soup.findAll('article', attrs={'class': lambda x: x and 'type-article' in x.split()})):
|
|
div = article.find('div', attrs={'class': lambda x: x and 'article-info' in x.split()})
|
|
a = div.find('a', href=True)
|
|
url = a['href']
|
|
if url.startswith('/'):
|
|
url = 'http://fortune.com' + url
|
|
title = self.tag_to_string(a)
|
|
ai = div.find('div', attrs={'class': lambda x: x and 'article-info-extended' in x.split()})
|
|
desc = ''
|
|
if ai:
|
|
desc = self.tag_to_string(desc)
|
|
self.log('Article:', title, 'at', url)
|
|
articles.append({'title': title, 'url': url, 'description': desc})
|
|
return [('Articles', articles)]
|