import re from calibre.web.feeds.news import BasicNewsRecipe from calibre import strftime class Physicstoday(BasicNewsRecipe): title = u'Physicstoday' __author__ = 'Hypernova' description = u'Physics Today magazine' publisher = 'American Institute of Physics' category = 'Physics' language = 'en' cover_url = strftime( 'http://ptonline.aip.org/journals/doc/PHTOAD-home/jrnls/images/medcover%m_%Y.jpg') oldest_article = 30 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False needs_subscription = True remove_javascript = True remove_tags_before = dict(name='h1') remove_tags = [dict(name='div', attrs={'class': 'highslide-footer'})] remove_tags = [dict(name='div', attrs={'class': 'highslide-header'})] preprocess_regexps = [ (re.compile(r'.*', re.DOTALL | re.IGNORECASE), lambda match: ''), ] def get_browser(self): br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open( 'http://ptonline.aip.org/journals/doc/PHTOAD-home/pt_login.jsp?fl=f') br.select_form(name='login_form') br['username'] = self.username br['password'] = self.password br.submit() return br feeds = [(u'All', u'http://www.physicstoday.org/feed.xml')]