calibre/recipes/prospectmaguk.recipe

#!/usr/bin/env python
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'

__license__ = 'GPL v3'

'''
calibre recipe for prospectmagazine.co.uk (subscription)
'''

from calibre.web.feeds.recipes import BasicNewsRecipe
from css_selectors import Select


class ProspectMagUK(BasicNewsRecipe):
    title = u'Prospect Magazine'
    description = 'A general-interest publication offering analysis and commentary about politics, news and business.'
    __author__ = 'barty, duluoz'
    timefmt = ' [%d %B %Y]'
    no_stylesheets = True
    publication_type = 'magazine'
    category = 'news, UK'
    language = 'en_GB'
    max_articles_per_feed = 100
    needs_subscription = True

    INDEX = 'http://www.prospectmagazine.co.uk/issue/'
    keep_only_tags = [dict(id='post_content')]

    def get_browser(self):
        br = BasicNewsRecipe.get_browser(self)
        if self.username is not None and self.password is not None:
            br.open('http://www.prospectmagazine.co.uk/wp-login.php')
            br.select_form(name='loginform')
            br['log'] = self.username
            br['pwd'] = self.password
            br.submit()
        return br

    def parse_index(self):
        root = self.index_to_soup(self.INDEX, as_tree=True)
        sel = Select(root)
        for img in sel('.block_this_month .img_wrap img'):
            self.cover_url = img.get('src').partition('?')[0]
        feeds = []
        for h2 in sel('h2.block-title'):
            current_section = self.tag_to_string(h2)
            articles = []
            self.log('Found section:', current_section)
            for div in sel('div.block_home_post', h2.getparent()):
                for a in sel('div.title a[href]', div):
                    articles.append({'title':self.tag_to_string(a), 'url':a.get('href')})
                    self.log('\tFound article:', articles[-1]['title'])
            if articles:
                feeds.append((current_section, articles))
        return feeds