import re from calibre.web.feeds.news import BasicNewsRecipe class PhysicsWorld(BasicNewsRecipe): title = u'Physicsworld' description = 'News from the world of physics' __author__ = 'Hypernova' language = 'en' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True cover_url = 'http://images.iop.org/cws/icons/themes/phw/header-logo.png' use_embedded_content = False remove_javascript = True needs_subscription = True remove_tags_before = dict(name='h1') remove_tags_after = [dict(name='div', attrs={'id':'shareThis'})] preprocess_regexps = [ (re.compile(r'
.*', re.DOTALL|re.IGNORECASE), lambda match: ''), ] feeds = [ (u'Headlines News', u'http://feeds.feedburner.com/PhysicsWorldNews') ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open('http://physicsworld.com/cws/sign-in') br.select_form(nr=2) br['username'] = self.username br['password'] = self.password br.submit() return br