calibre/recipes/fortune_magazine.recipe

from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict


class Fortune(BasicNewsRecipe):

    title = 'Fortune Magazine'
    __author__ = 'Rick Shang'

    description = 'FORTUNE is a global business magazine that has been revered in its content and credibility since 1930. FORTUNE covers the entire field of business, including specific companies and business trends, prominent business leaders, and new ideas shaping the global marketplace.'  # noqa
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'
    keep_only_tags = [dict(attrs={'id': ['storycontent']})]
    remove_tags = [
        dict(attrs={'class': ['hed_side', 'socialMediaToolbarContainer']})]
    no_javascript = True
    no_stylesheets = True
    needs_subscription = True

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open(
            'http://money.cnn.com/2013/03/21/smallbusiness/legal-marijuana-startups.pr.fortune/index.html')
        br.select_form(name="paywall-form")
        br['email'] = self.username
        br['password'] = self.password
        br.submit()
        return br

    def parse_index(self):
        articles = []
        soup0 = self.index_to_soup('http://money.cnn.com/magazines/fortune/')

        # Go to the latestissue
        soup = self.index_to_soup(soup0.find(
            'div', attrs={'class': 'latestissue'}).find('a', href=True)['href'])

        # Find cover & date
        cover_item = soup.find('div', attrs={'id': 'cover-story'})
        cover = cover_item.find('img', src=True)
        self.cover_url = cover['src']
        date = self.tag_to_string(cover_item.find(
            'div', attrs={'class': 'tocDate'})).strip()
        self.timefmt = u' [%s]' % date

        feeds = OrderedDict()
        section_title = ''

        # checkout the cover story
        articles = []
        coverstory = soup.find('div', attrs={'class': 'cnnHeadline'})
        title = self.tag_to_string(coverstory.a).strip()
        url = coverstory.a['href']
        desc = self.tag_to_string(coverstory.findNext(
            'p', attrs={'class': 'cnnBlurbTxt'}))
        articles.append({'title': title, 'url': url,
                         'description': desc, 'date': ''})
        feeds['Cover Story'] = []
        feeds['Cover Story'] += articles

        for post in soup.findAll('div', attrs={'class': 'cnnheader'}):
            section_title = self.tag_to_string(post).strip()
            articles = []

            ul = post.findNext('ul')
            for link in ul.findAll('li'):
                links = link.find('h2')
                title = self.tag_to_string(links.a).strip()
                url = links.a['href']
                desc = self.tag_to_string(
                    link.find('p', attrs={'class': 'cnnBlurbTxt'}))
                articles.append({'title': title, 'url': url,
                                 'description': desc, 'date': ''})

            if articles:
                if section_title not in feeds:
                    feeds[section_title] = []
                feeds[section_title] += articles

        ans = [(key, val) for key, val in feeds.iteritems()]
        return ans