__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' lrb.co.uk ''' from calibre import strftime from calibre.web.feeds.news import BasicNewsRecipe class LondonReviewOfBooksPayed(BasicNewsRecipe): title = 'London Review of Books' __author__ = 'Darko Miletic' description = 'Subscription content. Literary review publishing essay-length book reviews and topical articles on politics, literature, history, philosophy, science and the arts by leading writers and thinkers' category = 'news, literature, UK' publisher = 'LRB Ltd.' max_articles_per_feed = 100 language = 'en_GB' no_stylesheets = True delay = 1 use_embedded_content = False encoding = 'utf-8' INDEX = 'http://www.lrb.co.uk' LOGIN = INDEX + '/login' masthead_url = INDEX + '/assets/images/lrb_logo_big.gif' needs_subscription = True publication_type = 'magazine' extra_css = ' body{font-family: Georgia,Palatino,"Palatino Linotype",serif} ' def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(nr=1) br['username'] = self.username br['password'] = self.password br.submit() return br def parse_index(self): articles = [] soup = self.index_to_soup(self.INDEX) cover_item = soup.find('p',attrs={'class':'cover'}) lrbtitle = self.title if cover_item: self.cover_url = self.INDEX + cover_item.a.img['src'] content = self.INDEX + cover_item.a['href'] soup2 = self.index_to_soup(content) sitem = soup2.find(attrs={'class':'article-list'}) lrbtitle = soup2.head.title.string for item in sitem.findAll('a',attrs={'class':'title'}): description = u'' title_prefix = u'' feed_link = item if feed_link.has_key('href'): url = self.INDEX + feed_link['href'] title = title_prefix + self.tag_to_string(feed_link) date = strftime(self.timefmt) articles.append({ 'title' :title ,'date' :date ,'url' :url ,'description':description }) return [(lrbtitle, articles)] conversion_options = { 'comments' : description ,'tags' : category ,'language' : language ,'publisher' : publisher } keep_only_tags = [dict(name='div' , attrs={'class':['article-body indent','letters']})] remove_attributes = ['width','height']