calibre/recipes/lwn.recipe
John Kristensen 5359ed3744 Fix lwn.net login
The field names on the LWN login pages have changed and the recipe needs
to be updated to use the new field names.
2024-12-08 21:35:48 +11:00

55 lines
1.5 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
'''
lwn.net
'''
from calibre.web.feeds.news import BasicNewsRecipe
class LWN(BasicNewsRecipe):
title = u'lwn'
__author__ = 'Oliver Niesner'
description = 'Linux Weekly News'
oldest_article = 54
language = _('English')
max_articles_per_feed = 100
needs_subscription = True
language = 'en'
remove_javascript = True
simultaneous_downloads = 1
delay = 1
LOGIN = 'https://lwn.net/login'
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open(self.LOGIN)
br.select_form(name='loginform')
br['uname'] = self.username
br['pword'] = self.password
br.submit()
return br
remove_tags = [
dict(name='td', attrs={'class': 'LeftColumn'}),
dict(name='td', attrs={'class': 'NavLink'}),
dict(name='div', attrs={'class': 'FormattedComment'}),
dict(name='td', attrs={'class': 'MCTopBanner'}),
dict(name='div', attrs={'class': 'CommentBox'})
]
feeds = [
(u'lwn', u'http://lwn.net/headlines/newrss'),
]
def postprocess_html(self, soup, first):
for tag in soup.findAll(name=['table', 'tr', 'td']):
tag.name = 'div'
return soup
def print_version(self, url):
return url.replace('rss', '?format=printable')