From 306959fd4aa546f8b8d989e6942f22ea4d5dfcf0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 23 Feb 2019 08:56:19 +0530 Subject: [PATCH] Update General Knowledge Today Fixes #1817256 [Unable to fetch "General Knowledge today"](https://bugs.launchpad.net/calibre/+bug/1817256) --- recipes/gkt.recipe | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/recipes/gkt.recipe b/recipes/gkt.recipe index 6da219cb3a..a5e2d24054 100644 --- a/recipes/gkt.recipe +++ b/recipes/gkt.recipe @@ -1,7 +1,7 @@ from calibre.web.feeds.news import BasicNewsRecipe -class Politics(BasicNewsRecipe): +class GKT(BasicNewsRecipe): title = u'General Knowledge Today' language = 'en_IN' __author__ = 'Kovid Goyal' @@ -12,8 +12,13 @@ class Politics(BasicNewsRecipe): no_javascript = True auto_cleanup = True - def parse_gkt_section(self, url): - root = self.index_to_soup(url, as_tree=True) + def parse_gkt_section(self, url, ignore_error=False): + try: + root = self.index_to_soup(url, as_tree=True) + except Exception: + if ignore_error: + return + raise for a in root.xpath('//div[@class="post-content"]/h1/a[@href]'): title = self.tag_to_string(a).strip() url = a.get('href') @@ -28,12 +33,12 @@ class Politics(BasicNewsRecipe): h3 = root.xpath('//h3[@class="widget-title"]')[1] for a in h3.getparent().xpath('descendant::li/a[@href]'): category = self.tag_to_string(a).strip() - if 'PDF' in category: + if 'PDF' in category or not category: continue url = a.get('href') self.log('Found section:', category) articles = list(self.parse_gkt_section(url)) + \ - list(self.parse_gkt_section(url + '/page/2')) + list(self.parse_gkt_section(url + '/page/2', ignore_error=True)) if articles: ans.append((category, articles)) return ans