mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-30 23:00:21 -04:00
Update Outlook Magazine
fix images and remove some tags
This commit is contained in:
parent
7676b281ae
commit
33fc94b96d
@ -177,7 +177,7 @@ class HBR(BasicNewsRecipe):
|
||||
div.name = 'blockquote'
|
||||
for sidebar in soup.findAll(('article-sidebar', 'article-ideainbrief')):
|
||||
sidebar.name = 'blockquote'
|
||||
for img in soup.findAll(attrs={'srcset': True}):
|
||||
for img in soup.findAll('img', attrs={'srcset': True}):
|
||||
split = img['srcset'].split(',')
|
||||
for x in split:
|
||||
if '700w' in x:
|
||||
|
@ -33,23 +33,25 @@ class outlook(BasicNewsRecipe):
|
||||
|
||||
remove_tags = [
|
||||
dict(name='svg'),
|
||||
dict(name='a', attrs={'href':lambda x: x and x.startswith('https://www.whatsapp.com/')}),
|
||||
classes('ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1')
|
||||
dict(
|
||||
name='a',
|
||||
attrs={'href': lambda x: x and x.startswith('https://www.whatsapp.com/')},
|
||||
),
|
||||
classes(
|
||||
'ads-box info-img-absolute mobile-info-id story-dec-time-mobile sb-also-read ads-box1 story-mag-issue-section'
|
||||
),
|
||||
]
|
||||
|
||||
recipe_specific_options = {
|
||||
'date': {
|
||||
'short': 'The date of the edition to download (DD-Month-YYYY format)',
|
||||
'long': 'For example, 10-june-2024'
|
||||
'long': 'For example, 10-june-2024',
|
||||
}
|
||||
}
|
||||
|
||||
def get_browser(self):
|
||||
return BasicNewsRecipe.get_browser(self, user_agent='common_words/based', verify_ssl_certificates=False)
|
||||
|
||||
def parse_index(self):
|
||||
self.log(
|
||||
'\n***\nif this recipe fails, report it on: '
|
||||
'try again and again\n***\nif this recipe fails, report it on: '
|
||||
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||
)
|
||||
|
||||
@ -58,18 +60,18 @@ class outlook(BasicNewsRecipe):
|
||||
url = 'https://www.outlookindia.com/magazine/' + d
|
||||
else:
|
||||
soup = self.index_to_soup('https://www.outlookindia.com/magazine')
|
||||
a = soup.find('a', attrs={'aria-label':'magazine-cover-image'})
|
||||
a = soup.find('a', attrs={'aria-label': 'magazine-cover-image'})
|
||||
url = a['href']
|
||||
|
||||
self.log('Downloading issue:', url)
|
||||
|
||||
soup = self.index_to_soup(url)
|
||||
cov = soup.find(attrs={'aria-label':'magazine-cover-image'})
|
||||
cov = soup.find(attrs={'aria-label': 'magazine-cover-image'})
|
||||
self.cover_url = cov.img['src'].split('?')[0]
|
||||
summ = soup.find(attrs={'data-test-id':'magazine-summary'})
|
||||
summ = soup.find(attrs={'data-test-id': 'magazine-summary'})
|
||||
if summ:
|
||||
self.description = self.tag_to_string(summ)
|
||||
tme = soup.find(attrs={'class':'arr__timeago'})
|
||||
tme = soup.find(attrs={'class': 'arr__timeago'})
|
||||
if tme:
|
||||
self.timefmt = ' [' + self.tag_to_string(tme).split('-')[-1].strip() + ']'
|
||||
|
||||
@ -80,10 +82,12 @@ class outlook(BasicNewsRecipe):
|
||||
url = a['href']
|
||||
title = self.tag_to_string(a)
|
||||
desc = ''
|
||||
p = div.find_next_sibling('p', attrs={'class':lambda x: x and 'article-desc' in x.split()})
|
||||
p = div.find_next_sibling(
|
||||
'p', attrs={'class': lambda x: x and 'article-desc' in x.split()}
|
||||
)
|
||||
if p:
|
||||
desc = self.tag_to_string(p)
|
||||
auth = div.find_next_sibling('p', attrs={'class':'author'})
|
||||
auth = div.find_next_sibling('p', attrs={'class': 'author'})
|
||||
if auth:
|
||||
desc = self.tag_to_string(auth) + ' | ' + desc
|
||||
self.log('\t', title)
|
||||
@ -91,3 +95,12 @@ class outlook(BasicNewsRecipe):
|
||||
self.log('\t\t', url)
|
||||
ans.append({'title': title, 'url': url, 'description': desc})
|
||||
return [('Articles', ans)]
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
if sub := soup.find(**classes('subcap-story')):
|
||||
sub.name = 'p'
|
||||
for h2 in soup.findAll(['h2', 'h3']):
|
||||
h2.name = 'h4'
|
||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||
img['src'] = img['data-src'].split('?')[0] + '?w=600'
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user