calibre/recipes/phillosophy_now.recipe

import re
from calibre.web.feeds.recipes import BasicNewsRecipe
from collections import OrderedDict


class PhilosophyNow(BasicNewsRecipe):

    title = 'Philosophy Now'
    __author__ = 'Rick Shang'
    description = '''Philosophy Now is a lively magazine for everyone
    interested in ideas. It isn't afraid to tackle all the major questions of
    life, the universe and everything. Published every two months, it tries to
    corrupt innocent citizens by convincing them that philosophy can be
    exciting, worthwhile and comprehensible, and also to provide some enjoyable
    reading matter for those already ensnared by the muse, such as philosophy
    students and academics.'''
    language = 'en'
    category = 'news'
    encoding = 'UTF-8'

    keep_only_tags = [dict(attrs={'id': 'fullMainColumn'})]
    remove_tags = [dict(attrs={'class': 'articleTools'})]
    no_javascript = True
    no_stylesheets = True
    needs_subscription = True

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        br.open('https://philosophynow.org/auth/login')
        br.select_form(name="loginForm")
        br['username'] = self.username
        br['password'] = self.password
        br.submit()
        return br

    def parse_index(self):
        # Go to the issue
        soup0 = self.index_to_soup('http://philosophynow.org/')
        issue = soup0.find('div', attrs={'id': 'navColumn'})

        # Find date & cover
        cover = issue.find('div', attrs={'id': 'cover'})
        date = self.tag_to_string(cover.find('h3')).strip()
        self.timefmt = u' [%s]' % date
        img = cover.find('img', src=True)['src']
        self.cover_url = 'http://philosophynow.org' + \
            re.sub('medium', 'large', img)
        issuenum = re.sub('/media/images/covers/medium/issue', '', img)
        issuenum = re.sub('.jpg', '', issuenum)

        # Go to the main body
        current_issue_url = 'http://philosophynow.org/issues/' + issuenum
        soup = self.index_to_soup(current_issue_url)
        div = soup.find('div', attrs={'class': 'contentsColumn'})

        feeds = OrderedDict()

        for post in div.findAll('h1'):
            articles = []
            a = post.find('a', href=True)
            if a is not None:
                url = "http://philosophynow.org" + a['href']
                title = self.tag_to_string(a).strip()
                s = post.findPrevious('h3')
                section_title = self.tag_to_string(s).strip()
                d = post.findNext('h2')
                desc = self.tag_to_string(d).strip()
                articles.append({'title': title, 'url': url,
                                 'description': desc, 'date': ''})

                if articles:
                    if section_title not in feeds:
                        feeds[section_title] = []
                    feeds[section_title] += articles
        ans = [(key, val) for key, val in feeds.items()]
        return ans

    def cleanup(self):
        self.browser.open('http://philosophynow.org/auth/logout')