from calibre.web.feeds.recipes import BasicNewsRecipe class NYTimes(BasicNewsRecipe): title = 'National Post' __author__ = 'Krittika Goyal' description = 'Canadian national newspaper' timefmt = ' [%d %b, %Y]' language = 'en_CA' needs_subscription = False no_stylesheets = True auto_cleanup = True auto_cleanup_keep = '//*[@class="npStoryPhoto npTxtPlain"]' # TO GET ARTICLE TOC def nejm_get_index(self): return self.index_to_soup('http://www.nationalpost.com/todays-paper/index.html') # To parse artice toc def parse_index(self): soup = self.nejm_get_index() div = soup.find(id='npContentMain') current_section = None current_articles = [] feeds = [] for x in div.findAll(True): if x.name == 'h4': # Section found if current_articles and current_section: feeds.append((current_section, current_articles)) current_section = self.tag_to_string(x) current_articles = [] self.log('\tFound section:', current_section) if current_section is not None and x.name == 'h5': # Article found title = self.tag_to_string(x) a = x.find('a', href=True) if a is None: continue url = a.get('href', False) if not url or not title: continue # if url.startswith('story'): # url = 'http://www.nationalpost.com/todays-paper/'+url self.log('\t\tFound article:', title) self.log('\t\t\t', url) current_articles.append({'title': title, 'url': url, 'description': '', 'date': ''}) if current_articles and current_section: feeds.append((current_section, current_articles)) return feeds