mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
d266fe5447
@ -135,6 +135,11 @@ class NatGeo(BasicNewsRecipe):
|
|||||||
.auth, .time { font-size:small; color:#5c5c5c; }
|
.auth, .time { font-size:small; color:#5c5c5c; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
def get_cover_url(self):
|
||||||
|
soup = self.index_to_soup('https://www.nationalgeographic.com/magazine/')
|
||||||
|
png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8'))
|
||||||
|
return png[0] + '?w=1000&h=1000'
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
pages = [
|
pages = [
|
||||||
'https://www.nationalgeographic.com/animals',
|
'https://www.nationalgeographic.com/animals',
|
||||||
@ -176,3 +181,9 @@ class NatGeo(BasicNewsRecipe):
|
|||||||
# for high res images use '?w=2000&h=2000'
|
# for high res images use '?w=2000&h=2000'
|
||||||
img['src'] = img['src'] + '?w=1000&h=1000'
|
img['src'] = img['src'] + '?w=1000&h=1000'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
summ = soup.find(attrs={'class':'byline'})
|
||||||
|
if summ:
|
||||||
|
article.summary = self.tag_to_string(summ)
|
||||||
|
article.text_summary = self.tag_to_string(summ)
|
||||||
|
@ -160,3 +160,9 @@ class NatGeo(BasicNewsRecipe):
|
|||||||
# for high res images use '?w=2000&h=2000'
|
# for high res images use '?w=2000&h=2000'
|
||||||
img['src'] = img['src'] + '?w=1000&h=1000'
|
img['src'] = img['src'] + '?w=1000&h=1000'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
summ = soup.find(attrs={'class':'byline'})
|
||||||
|
if summ:
|
||||||
|
article.summary = self.tag_to_string(summ)
|
||||||
|
article.text_summary = self.tag_to_string(summ)
|
||||||
|
@ -143,7 +143,7 @@ class NatGeo(BasicNewsRecipe):
|
|||||||
self.log('Downloading ', url)
|
self.log('Downloading ', url)
|
||||||
self.timefmt = ' [' + edition + ']'
|
self.timefmt = ' [' + edition + ']'
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-magazine-\S+?\.jpg', soup.decode('utf-8'))
|
png = re.findall('https://i\.natgeofe\.com\S+?national-geographic-\S+?\.jpg', soup.decode('utf-8'))
|
||||||
self.cover_url = png[0] + '?w=1000&h=1000'
|
self.cover_url = png[0] + '?w=1000&h=1000'
|
||||||
|
|
||||||
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
|
name = soup.find(attrs={'class':lambda x: x and 'Header__Description' in x.split()})
|
||||||
@ -179,3 +179,9 @@ class NatGeo(BasicNewsRecipe):
|
|||||||
# for high res images use '?w=2000&h=2000'
|
# for high res images use '?w=2000&h=2000'
|
||||||
img['src'] = img['src'] + '?w=1200&h=1200'
|
img['src'] = img['src'] + '?w=1200&h=1200'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def populate_article_metadata(self, article, soup, first):
|
||||||
|
summ = soup.find(attrs={'class':'byline'})
|
||||||
|
if summ:
|
||||||
|
article.summary = self.tag_to_string(summ)
|
||||||
|
article.text_summary = self.tag_to_string(summ)
|
||||||
|
@ -58,6 +58,10 @@ class TheEconomicTimes(BasicNewsRecipe):
|
|||||||
]
|
]
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
self.log(
|
||||||
|
'\n***\nif this recipe fails, report it on: '
|
||||||
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
|
)
|
||||||
soup = self.index_to_soup(
|
soup = self.index_to_soup(
|
||||||
'https://economictimes.indiatimes.com/print_edition.cms'
|
'https://economictimes.indiatimes.com/print_edition.cms'
|
||||||
)
|
)
|
||||||
|
@ -52,6 +52,10 @@ class toiprint(BasicNewsRecipe):
|
|||||||
return cover
|
return cover
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
|
self.log(
|
||||||
|
'\n***\nif this recipe fails, report it on: '
|
||||||
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
|
)
|
||||||
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
|
url = index + '/DayIndex/' + date_ + '_' + le + '.json'
|
||||||
raw = self.index_to_soup(url, raw=True)
|
raw = self.index_to_soup(url, raw=True)
|
||||||
data = json.loads(raw)
|
data = json.loads(raw)
|
||||||
@ -73,7 +77,7 @@ class toiprint(BasicNewsRecipe):
|
|||||||
if 'ArticleName' not in art:
|
if 'ArticleName' not in art:
|
||||||
continue
|
continue
|
||||||
url = art['ArticleName']
|
url = art['ArticleName']
|
||||||
title = art.get('ArticleTitle', 'unknown').replace('<br>', '')
|
title = art.get('ArticleTitle', 'unknown').replace('<br>', '').replace('<br/>', '')
|
||||||
if art.get('ColumnTitle', '') == '':
|
if art.get('ColumnTitle', '') == '':
|
||||||
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
desc = 'Page No.' + url.split('_')[-3] + ' | ' + art.get('ArticleBody', '')
|
||||||
else:
|
else:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user