#!/usr/bin/env python __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' ''' lwn.net ''' from calibre.web.feeds.news import BasicNewsRecipe class LWN(BasicNewsRecipe): title = u'lwn' __author__ = 'Oliver Niesner' description = 'Linux Weekly News' oldest_article = 54 language = _('English') max_articles_per_feed = 100 needs_subscription = True language = 'en' remove_javascript = True simultaneous_downloads= 1 delay = 1 LOGIN = 'https://lwn.net/login' def get_browser(self): br = BasicNewsRecipe.get_browser() if self.username is not None and self.password is not None: br.open(self.LOGIN) br.select_form(name='loginform') br['Username'] = self.username br['Password'] = self.password br.submit() return br remove_tags = [ dict(name='td', attrs={'class':'LeftColumn'}), dict(name='td', attrs={'class':'NavLink'}), dict(name='div', attrs={'class':'FormattedComment'}), dict(name='td', attrs={'class':'MCTopBanner'}), dict(name='div', attrs={'class':'CommentBox'}) ] feeds = [ (u'lwn' , u'http://lwn.net/headlines/newrss' ), ] def postprocess_html(self, soup, first): for tag in soup.findAll(name=['table', 'tr', 'td']): tag.name = 'div' return soup def print_version(self, url): return url.replace ('rss', '?format=printable')