This commit is contained in:
Kovid Goyal 2023-11-11 11:48:50 +05:30
commit d266fe5447
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
5 changed files with 33 additions and 2 deletions

View File

@ -135,6 +135,11 @@ class NatGeo(BasicNewsRecipe):
.auth, .time { font-size:small; color:#5c5c5c; } .auth, .time { font-size:small; color:#5c5c5c; }
''' '''
def get_cover_url(self):
soup = self.index_to_soup('https://www.nationalgeographic.com/magazine/')
png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8'))
return png[0] + '?w=1000&h=1000'
def parse_index(self): def parse_index(self):
pages = [ pages = [
'https://www.nationalgeographic.com/animals', 'https://www.nationalgeographic.com/animals',
@ -176,3 +181,9 @@ class NatGeo(BasicNewsRecipe):
# for high res images use '?w=2000&h=2000' # for high res images use '?w=2000&h=2000'
img['src'] = img['src'] + '?w=1000&h=1000' img['src'] = img['src'] + '?w=1000&h=1000'
return soup return soup
def populate_article_metadata(self, article, soup, first):
summ = soup.find(attrs={'class':'byline'})
if summ:
article.summary = self.tag_to_string(summ)
article.text_summary = self.tag_to_string(summ)

View File

@ -160,3 +160,9 @@ class NatGeo(BasicNewsRecipe):
# for high res images use '?w=2000&h=2000' # for high res images use '?w=2000&h=2000'
img['src'] = img['src'] + '?w=1000&h=1000' img['src'] = img['src'] + '?w=1000&h=1000'
return soup return soup
def populate_article_metadata(self, article, soup, first):
summ = soup.find(attrs={'class':'byline'})
if summ:
article.summary = self.tag_to_string(summ)
article.text_summary = self.tag_to_string(summ)

View File

@ -143,7 +143,7 @@ class NatGeo(BasicNewsRecipe):
self.log('Downloading ', url) self.log('Downloading ', url)
self.timefmt = ' [' + edition + ']' self.timefmt = ' [' + edition + ']'
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-magazine-\S+?\.jpg', soup.decode('utf-8')) png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8'))
self.cover_url = png[0] + '?w=1000&h=1000' self.cover_url = png[0] + '?w=1000&h=1000'
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()}) name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
@ -179,3 +179,9 @@ class NatGeo(BasicNewsRecipe):
# for high res images use '?w=2000&h=2000' # for high res images use '?w=2000&h=2000'
img['src'] = img['src'] + '?w=1200&h=1200' img['src'] = img['src'] + '?w=1200&h=1200'
return soup return soup
def populate_article_metadata(self, article, soup, first):
summ = soup.find(attrs={'class':'byline'})
if summ:
article.summary = self.tag_to_string(summ)
article.text_summary = self.tag_to_string(summ)

View File

@ -58,6 +58,10 @@ class TheEconomicTimes(BasicNewsRecipe):
] ]
def parse_index(self): def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
soup = self.index_to_soup( soup = self.index_to_soup(
'https://economictimes.indiatimes.com/print_edition.cms' 'https://economictimes.indiatimes.com/print_edition.cms'
) )

View File

@ -52,6 +52,10 @@ class toiprint(BasicNewsRecipe):
return cover return cover
def parse_index(self): def parse_index(self):
self.log(
'\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
)
url = index + '/DayIndex/' + date_ + '_' + le + '.json' url = index + '/DayIndex/' + date_ + '_' + le + '.json'
raw = self.index_to_soup(url, raw=True) raw = self.index_to_soup(url, raw=True)
data = json.loads(raw) data = json.loads(raw)
@ -73,7 +77,7 @@ class toiprint(BasicNewsRecipe):
if 'ArticleName' not in art: if 'ArticleName' not in art:
continue continue
url = art['ArticleName'] url = art['ArticleName']
title = art.get('ArticleTitle', 'unknown').replace('<br>', '') title = art.get('ArticleTitle', 'unknown').replace('<br>', '').replace('<br/>', '')
if art.get('ColumnTitle', '') == '': if art.get('ColumnTitle', '') == '':
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '') desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
else: else: