This commit is contained in:
Kovid Goyal 2025-08-24 20:52:52 +05:30
commit f44032badb
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 27 additions and 14 deletions

View File

@ -177,7 +177,7 @@ class HBR(BasicNewsRecipe):
div.name = 'blockquote' div.name = 'blockquote'
for sidebar in soup.findAll(('article-sidebar', 'article-ideainbrief')): for sidebar in soup.findAll(('article-sidebar', 'article-ideainbrief')):
sidebar.name = 'blockquote' sidebar.name = 'blockquote'
for img in soup.findAll(attrs={'srcset': True}): for img in soup.findAll('img', attrs={'srcset': True}):
split = img['srcset'].split(',') split = img['srcset'].split(',')
for x in split: for x in split:
if '700w' in x: if '700w' in x:

View File

@ -33,23 +33,25 @@ class outlook(BasicNewsRecipe):
remove_tags = [ remove_tags = [
dict(name='svg'), dict(name='svg'),
dict(name='a', attrs={'href':lambda x: x and x.startswith('https://www.whatsapp.com/')}), dict(
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1') name='a',
attrs={'href': lambda x: x and x.startswith('https://www.whatsapp.com/')},
),
classes(
'ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1 story-mag-issue-section'
),
] ]
recipe_specific_options = { recipe_specific_options = {
'date': { 'date': {
'short': 'The date of the edition to download (DD-Month-YYYY format)', 'short': 'The date of the edition to download (DD-Month-YYYY format)',
'long': 'For example, 10-june-2024' 'long': 'For example, 10-june-2024',
} }
} }
def get_browser(self):
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
def parse_index(self): def parse_index(self):
self.log( self.log(
'\n***\nif this recipe fails, report it on: ' 'try again and again\n***\nif this recipe fails, report it on: '
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n' 'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
) )
@ -58,18 +60,18 @@ class outlook(BasicNewsRecipe):
url = 'https://www.outlookindia.com/magazine/' + d url = 'https://www.outlookindia.com/magazine/' + d
else: else:
soup = self.index_to_soup('https://www.outlookindia.com/magazine') soup = self.index_to_soup('https://www.outlookindia.com/magazine')
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'}) a = soup.find('a', attrs={'aria-label': 'magazine-cover-image'})
url = a['href'] url = a['href']
self.log('Downloading issue:', url) self.log('Downloading issue:', url)
soup = self.index_to_soup(url) soup = self.index_to_soup(url)
cov = soup.find(attrs={'aria-label':'magazine-cover-image'}) cov = soup.find(attrs={'aria-label': 'magazine-cover-image'})
self.cover_url = cov.img['src'].split('?')[0] self.cover_url = cov.img['src'].split('?')[0]
summ = soup.find(attrs={'data-test-id':'magazine-summary'}) summ = soup.find(attrs={'data-test-id': 'magazine-summary'})
if summ: if summ:
self.description = self.tag_to_string(summ) self.description = self.tag_to_string(summ)
tme = soup.find(attrs={'class':'arr__timeago'}) tme = soup.find(attrs={'class': 'arr__timeago'})
if tme: if tme:
self.timefmt = ' [' + self.tag_to_string(tme).split('-')[-1].strip() + ']' self.timefmt = ' [' + self.tag_to_string(tme).split('-')[-1].strip() + ']'
@ -80,10 +82,12 @@ class outlook(BasicNewsRecipe):
url = a['href'] url = a['href']
title = self.tag_to_string(a) title = self.tag_to_string(a)
desc = '' desc = ''
p = div.find_next_sibling('p', attrs={'class':lambda x: x and 'article-desc' in x.split()}) p = div.find_next_sibling(
'p', attrs={'class': lambda x: x and 'article-desc' in x.split()}
)
if p: if p:
desc = self.tag_to_string(p) desc = self.tag_to_string(p)
auth = div.find_next_sibling('p', attrs={'class':'author'}) auth = div.find_next_sibling('p', attrs={'class': 'author'})
if auth: if auth:
desc = self.tag_to_string(auth) + ' | ' + desc desc = self.tag_to_string(auth) + ' | ' + desc
self.log('\t', title) self.log('\t', title)
@ -91,3 +95,12 @@ class outlook(BasicNewsRecipe):
self.log('\t\t', url) self.log('\t\t', url)
ans.append({'title': title, 'url': url, 'description': desc}) ans.append({'title': title, 'url': url, 'description': desc})
return [('Articles', ans)] return [('Articles', ans)]
def preprocess_html(self, soup):
if sub := soup.find(**classes('subcap-story')):
sub.name = 'p'
for h2 in soup.findAll(['h2', 'h3']):
h2.name = 'h4'
for img in soup.findAll('img', attrs={'data-src': True}):
img['src'] = img['data-src'].split('?')[0] + '?w=600'
return soup