from calibre.web.feeds.news import BasicNewsRecipe, classes class GKT(BasicNewsRecipe): title = u'General Knowledge Today' language = 'en_IN' __author__ = 'Kovid Goyal' oldest_article = 7 # days max_articles_per_feed = 20 no_stylesheets = True no_javascript = True auto_cleanup = True def parse_index(self): securl = 'https://www.gktoday.in/current-affairs/' ans = {} def p_tags(h1): for sib in h1.next_siblings: if sib.name == 'h1': break if sib.name == 'p': yield sib def find_cat(ps): for p in ps: for a in p.findAll('a', rel='tag'): return self.tag_to_string(a) for i in range(1, 6): page = '' if i == 1 else 'page/' + str(i) self.log('Trying:', securl + page) soup = self.index_to_soup(securl + page) container = soup.find(**classes('left_middle_content')) for h1 in container.findAll('h1'): title = self.tag_to_string(h1) a = h1.find('a') if a is None: continue url = a['href'] ps = tuple(p_tags(h1)) category = find_cat(ps) or 'Unknown' ans.setdefault(category, []).append({ 'title': title, 'url': url, 'description': self.tag_to_string(ps[0])}) self.log('\t' + title + ' ' + url) return list(ans.items())