diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe index 920cc92332..a22e5824b1 100644 --- a/recipes/natgeo.recipe +++ b/recipes/natgeo.recipe @@ -135,6 +135,11 @@ class NatGeo(BasicNewsRecipe): .auth, .time { font-size:small; color:#5c5c5c; } ''' + def get_cover_url(self): + soup = self.index_to_soup('https://www.nationalgeographic.com/magazine/') + png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8')) + return png[0] + '?w=1000&h=1000' + def parse_index(self): pages = [ 'https://www.nationalgeographic.com/animals', @@ -176,3 +181,9 @@ class NatGeo(BasicNewsRecipe): # for high res images use '?w=2000&h=2000' img['src'] = img['src'] + '?w=1000&h=1000' return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe index b5d74258b9..683a9e73b7 100644 --- a/recipes/natgeohis.recipe +++ b/recipes/natgeohis.recipe @@ -160,3 +160,9 @@ class NatGeo(BasicNewsRecipe): # for high res images use '?w=2000&h=2000' img['src'] = img['src'] + '?w=1000&h=1000' return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe index bec1e910c0..5b21fd54fd 100644 --- a/recipes/natgeomag.recipe +++ b/recipes/natgeomag.recipe @@ -143,7 +143,7 @@ class NatGeo(BasicNewsRecipe): self.log('Downloading ', url) self.timefmt = ' [' + edition + ']' soup = self.index_to_soup(url) - png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-magazine-\S+?\.jpg', soup.decode('utf-8')) + png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8')) self.cover_url = png[0] + '?w=1000&h=1000' name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()}) @@ -179,3 +179,9 @@ class NatGeo(BasicNewsRecipe): # for high res images use '?w=2000&h=2000' img['src'] = img['src'] + '?w=1200&h=1200' return soup + + def populate_article_metadata(self, article, soup, first): + summ = soup.find(attrs={'class':'byline'}) + if summ: + article.summary = self.tag_to_string(summ) + article.text_summary = self.tag_to_string(summ) diff --git a/recipes/theeconomictimes_india_print_edition.recipe b/recipes/theeconomictimes_india_print_edition.recipe index 293eb63e0d..330fff007a 100644 --- a/recipes/theeconomictimes_india_print_edition.recipe +++ b/recipes/theeconomictimes_india_print_edition.recipe @@ -58,6 +58,10 @@ class TheEconomicTimes(BasicNewsRecipe): ] def parse_index(self): + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) soup = self.index_to_soup( 'https://economictimes.indiatimes.com/print_edition.cms' ) diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe index 9f3127b14e..cb2f42a40d 100644 --- a/recipes/toiprint.recipe +++ b/recipes/toiprint.recipe @@ -52,6 +52,10 @@ class toiprint(BasicNewsRecipe): return cover def parse_index(self): + self.log( + '\n***\nif this recipe fails, report it on: ' + 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' + ) url = index + '/DayIndex/' + date_ + '_' + le + '.json' raw = self.index_to_soup(url, raw=True) data = json.loads(raw) @@ -73,7 +77,7 @@ class toiprint(BasicNewsRecipe): if 'ArticleName' not in art: continue url = art['ArticleName'] - title = art.get('ArticleTitle', 'unknown').replace('
', '') + title = art.get('ArticleTitle', 'unknown').replace('
', '').replace('
', '') if art.get('ColumnTitle', '') == '': desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '') else: