mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-30 23:00:21 -04:00
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
f44032badb
@ -177,7 +177,7 @@ class HBR(BasicNewsRecipe):
|
|||||||
div.name = 'blockquote'
|
div.name = 'blockquote'
|
||||||
for sidebar in soup.findAll(('article-sidebar', 'article-ideainbrief')):
|
for sidebar in soup.findAll(('article-sidebar', 'article-ideainbrief')):
|
||||||
sidebar.name = 'blockquote'
|
sidebar.name = 'blockquote'
|
||||||
for img in soup.findAll(attrs={'srcset': True}):
|
for img in soup.findAll('img', attrs={'srcset': True}):
|
||||||
split = img['srcset'].split(',')
|
split = img['srcset'].split(',')
|
||||||
for x in split:
|
for x in split:
|
||||||
if '700w' in x:
|
if '700w' in x:
|
||||||
|
@ -33,23 +33,25 @@ class outlook(BasicNewsRecipe):
|
|||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name='svg'),
|
dict(name='svg'),
|
||||||
dict(name='a', attrs={'href':lambda x: x and x.startswith('https://www.whatsapp.com/')}),
|
dict(
|
||||||
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
name='a',
|
||||||
|
attrs={'href': lambda x: x and x.startswith('https://www.whatsapp.com/')},
|
||||||
|
),
|
||||||
|
classes(
|
||||||
|
'ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1 story-mag-issue-section'
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
recipe_specific_options = {
|
recipe_specific_options = {
|
||||||
'date': {
|
'date': {
|
||||||
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
||||||
'long': 'For example, 10-june-2024'
|
'long': 'For example, 10-june-2024',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def get_browser(self):
|
|
||||||
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
self.log(
|
self.log(
|
||||||
'\n***\nif this recipe fails, report it on: '
|
'try again and again\n***\nif this recipe fails, report it on: '
|
||||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -58,18 +60,18 @@ class outlook(BasicNewsRecipe):
|
|||||||
url = 'https://www.outlookindia.com/magazine/' + d
|
url = 'https://www.outlookindia.com/magazine/' + d
|
||||||
else:
|
else:
|
||||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
a = soup.find('a', attrs={'aria-label': 'magazine-cover-image'})
|
||||||
url = a['href']
|
url = a['href']
|
||||||
|
|
||||||
self.log('Downloading issue:', url)
|
self.log('Downloading issue:', url)
|
||||||
|
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
|
cov = soup.find(attrs={'aria-label': 'magazine-cover-image'})
|
||||||
self.cover_url = cov.img['src'].split('?')[0]
|
self.cover_url = cov.img['src'].split('?')[0]
|
||||||
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
|
summ = soup.find(attrs={'data-test-id': 'magazine-summary'})
|
||||||
if summ:
|
if summ:
|
||||||
self.description = self.tag_to_string(summ)
|
self.description = self.tag_to_string(summ)
|
||||||
tme = soup.find(attrs={'class':'arr__timeago'})
|
tme = soup.find(attrs={'class': 'arr__timeago'})
|
||||||
if tme:
|
if tme:
|
||||||
self.timefmt = ' [' + self.tag_to_string(tme).split('-')[-1].strip() + ']'
|
self.timefmt = ' [' + self.tag_to_string(tme).split('-')[-1].strip() + ']'
|
||||||
|
|
||||||
@ -80,10 +82,12 @@ class outlook(BasicNewsRecipe):
|
|||||||
url = a['href']
|
url = a['href']
|
||||||
title = self.tag_to_string(a)
|
title = self.tag_to_string(a)
|
||||||
desc = ''
|
desc = ''
|
||||||
p = div.find_next_sibling('p', attrs={'class':lambda x: x and 'article-desc' in x.split()})
|
p = div.find_next_sibling(
|
||||||
|
'p', attrs={'class': lambda x: x and 'article-desc' in x.split()}
|
||||||
|
)
|
||||||
if p:
|
if p:
|
||||||
desc = self.tag_to_string(p)
|
desc = self.tag_to_string(p)
|
||||||
auth = div.find_next_sibling('p', attrs={'class':'author'})
|
auth = div.find_next_sibling('p', attrs={'class': 'author'})
|
||||||
if auth:
|
if auth:
|
||||||
desc = self.tag_to_string(auth) + ' | ' + desc
|
desc = self.tag_to_string(auth) + ' | ' + desc
|
||||||
self.log('\t', title)
|
self.log('\t', title)
|
||||||
@ -91,3 +95,12 @@ class outlook(BasicNewsRecipe):
|
|||||||
self.log('\t\t', url)
|
self.log('\t\t', url)
|
||||||
ans.append({'title': title, 'url': url, 'description': desc})
|
ans.append({'title': title, 'url': url, 'description': desc})
|
||||||
return [('Articles', ans)]
|
return [('Articles', ans)]
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
if sub := soup.find(**classes('subcap-story')):
|
||||||
|
sub.name = 'p'
|
||||||
|
for h2 in soup.findAll(['h2', 'h3']):
|
||||||
|
h2.name = 'h4'
|
||||||
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
|
img['src'] = img['data-src'].split('?')[0] + '?w=600'
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user