mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fortune Magazine by Rick Shang
This commit is contained in:
parent
ca5629beaa
commit
0335a44424
75
recipes/fortune_magazine.recipe
Normal file
75
recipes/fortune_magazine.recipe
Normal file
@ -0,0 +1,75 @@
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
from collections import OrderedDict
|
||||
|
||||
class Fortune(BasicNewsRecipe):
|
||||
|
||||
title = 'Fortune Magazine'
|
||||
__author__ = 'Rick Shang'
|
||||
|
||||
description = 'FORTUNE is a global business magazine that has been revered in its content and credibility since 1930. FORTUNE covers the entire field of business, including specific companies and business trends, prominent business leaders, and new ideas shaping the global marketplace.'
|
||||
language = 'en'
|
||||
category = 'news'
|
||||
encoding = 'UTF-8'
|
||||
keep_only_tags = [dict(attrs={'id':['storycontent']})]
|
||||
remove_tags = [dict(attrs={'class':['hed_side','socialMediaToolbarContainer']})]
|
||||
no_javascript = True
|
||||
no_stylesheets = True
|
||||
needs_subscription = True
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.open('http://money.cnn.com/2013/03/21/smallbusiness/legal-marijuana-startups.pr.fortune/index.html')
|
||||
br.select_form(name="paywall-form")
|
||||
br['email'] = self.username
|
||||
br['password'] = self.password
|
||||
br.submit()
|
||||
return br
|
||||
|
||||
def parse_index(self):
|
||||
articles = []
|
||||
soup0 = self.index_to_soup('http://money.cnn.com/magazines/fortune/')
|
||||
|
||||
#Go to the latestissue
|
||||
soup = self.index_to_soup(soup0.find('div',attrs={'class':'latestissue'}).find('a',href=True)['href'])
|
||||
|
||||
#Find cover & date
|
||||
cover_item = soup.find('div', attrs={'id':'cover-story'})
|
||||
cover = cover_item.find('img',src=True)
|
||||
self.cover_url = cover['src']
|
||||
date = self.tag_to_string(cover_item.find('div', attrs={'class':'tocDate'})).strip()
|
||||
self.timefmt = u' [%s]'%date
|
||||
|
||||
|
||||
feeds = OrderedDict()
|
||||
section_title = ''
|
||||
|
||||
#checkout the cover story
|
||||
articles = []
|
||||
coverstory=soup.find('div', attrs={'class':'cnnHeadline'})
|
||||
title=self.tag_to_string(coverstory.a).strip()
|
||||
url=coverstory.a['href']
|
||||
desc=self.tag_to_string(coverstory.findNext('p', attrs={'class':'cnnBlurbTxt'}))
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
feeds['Cover Story'] = []
|
||||
feeds['Cover Story'] += articles
|
||||
|
||||
for post in soup.findAll('div', attrs={'class':'cnnheader'}):
|
||||
section_title = self.tag_to_string(post).strip()
|
||||
articles = []
|
||||
|
||||
ul=post.findNext('ul')
|
||||
for link in ul.findAll('li'):
|
||||
links=link.find('h2')
|
||||
title=self.tag_to_string(links.a).strip()
|
||||
url=links.a['href']
|
||||
desc=self.tag_to_string(link.find('p', attrs={'class':'cnnBlurbTxt'}))
|
||||
articles.append({'title':title, 'url':url, 'description':desc, 'date':''})
|
||||
|
||||
if articles:
|
||||
if section_title not in feeds:
|
||||
feeds[section_title] = []
|
||||
feeds[section_title] += articles
|
||||
|
||||
ans = [(key, val) for key, val in feeds.iteritems()]
|
||||
return ans
|
||||
|
Loading…
x
Reference in New Issue
Block a user