mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Business Today
This commit is contained in:
parent
6f82b8e36c
commit
fe60dd936f
@ -7,6 +7,8 @@ class BT(BasicNewsRecipe):
|
|||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
no_stylesheets = True
|
no_stylesheets = True
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
remove_javascript = True
|
||||||
|
encoding = 'utf-8'
|
||||||
remove_attributes = ['style', 'height', 'width']
|
remove_attributes = ['style', 'height', 'width']
|
||||||
ignore_duplicate_articles = {'url'}
|
ignore_duplicate_articles = {'url'}
|
||||||
description = (
|
description = (
|
||||||
@ -16,24 +18,36 @@ class BT(BasicNewsRecipe):
|
|||||||
masthead_url = 'https://akm-img-a-in.tosshub.com/businesstoday/resource/img/logo.png'
|
masthead_url = 'https://akm-img-a-in.tosshub.com/businesstoday/resource/img/logo.png'
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('story-heading sab-head-tranlate-sec user-detial-left main-img field--name-body'),
|
classes('story-heading sab-head-tranlate-sec brand-detial-main main-img field--name-body'),
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
|
dict(name=['link', 'meta', 'svg', 'button', 'script']),
|
||||||
dict(name='a', attrs={'title': 'videos'}),
|
dict(name='a', attrs={'title': 'videos'}),
|
||||||
classes('tranding-topics-main newsltter-iframe hedlineteg')
|
classes(
|
||||||
|
'tranding-topics-main newsltter-iframe hedlineteg stoybday-ad story-recommended-chunk '
|
||||||
|
'banner_content'
|
||||||
|
)
|
||||||
]
|
]
|
||||||
extra_css = 'a[href^="https://www.businesstoday.in/videos"]{display: none;}'
|
extra_css = '''
|
||||||
|
img {display:block; margin:0 auto;}
|
||||||
|
em { color:#202020; }
|
||||||
|
.main-img { font-size:small; text-align:center; }
|
||||||
|
.summary {font-style:italic; color:#202020; }
|
||||||
|
'''
|
||||||
|
|
||||||
def parse_index(self):
|
def parse_index(self):
|
||||||
soup = self.index_to_soup('https://www.businesstoday.in/magazine')
|
self.log(
|
||||||
issue = soup.find(attrs={'class': 'swiper-wrapper'})
|
'\n***\nif this recipe fails, report it on: '
|
||||||
a = issue.findAll('a', href=lambda x: x and '/magazine/issue/' in x)[1]
|
'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
|
||||||
|
)
|
||||||
|
soup = self.index_to_soup('https://www.businesstoday.in')
|
||||||
|
a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1]
|
||||||
|
self.cover_url = a.img['data-src'].split('?')[0]
|
||||||
url = a['href']
|
url = a['href']
|
||||||
self.log('issue =', url)
|
self.log('issue =', url)
|
||||||
|
self.timefmt = ' [' + url.split('/')[-1] + ']'
|
||||||
soup = self.index_to_soup(url)
|
soup = self.index_to_soup(url)
|
||||||
tag = soup.find(attrs={'class': 'issue-image'})
|
|
||||||
if tag:
|
|
||||||
self.cover_url = tag.find('img')['src']
|
|
||||||
section = None
|
section = None
|
||||||
sections = {}
|
sections = {}
|
||||||
|
|
||||||
@ -78,6 +92,20 @@ class BT(BasicNewsRecipe):
|
|||||||
return feeds
|
return feeds
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
auth = soup.find(**classes('brand-detial-main'))
|
||||||
|
if auth:
|
||||||
|
ul = auth.find('ul')
|
||||||
|
if ul:
|
||||||
|
ul.decompose()
|
||||||
|
for vid in soup.findAll('a', attrs={
|
||||||
|
'href': lambda x: x and 'businesstoday.in/videos' in x
|
||||||
|
}):
|
||||||
|
vid.decompose()
|
||||||
|
summ = soup.find(**classes('summary'))
|
||||||
|
if summ:
|
||||||
|
h2 = summ.find('h2')
|
||||||
|
if h2:
|
||||||
|
h2.name = 'p'
|
||||||
for img in soup.findAll('img', attrs={'data-src': True}):
|
for img in soup.findAll('img', attrs={'data-src': True}):
|
||||||
img['src'] = img['data-src'].split('?')[0]
|
img['src'] = img['data-src'].split('?')[0]
|
||||||
return soup
|
return soup
|
||||||
|
@ -23,6 +23,20 @@ date_ = dt.strftime('%d_%m_%Y')
|
|||||||
index = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' + date0
|
index = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' + date0
|
||||||
img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0
|
img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0
|
||||||
|
|
||||||
|
def handle_images(x, soup):
|
||||||
|
img = soup.find('img')
|
||||||
|
if img:
|
||||||
|
img_div = img.findParent('div')
|
||||||
|
cap = img_div.next_sibling
|
||||||
|
if cap and cap.has_attr('class') and 'cap' in cap['class']:
|
||||||
|
x.insert_after(img_div)
|
||||||
|
img_div.insert_after(cap)
|
||||||
|
else:
|
||||||
|
x.insert_after(img_div)
|
||||||
|
lead = soup.find('div', attrs={'class':'lead'})
|
||||||
|
if lead:
|
||||||
|
x.insert_after(lead)
|
||||||
|
|
||||||
class toiprint(BasicNewsRecipe):
|
class toiprint(BasicNewsRecipe):
|
||||||
title = 'TOI Print Edition'
|
title = 'TOI Print Edition'
|
||||||
language = 'en_IN'
|
language = 'en_IN'
|
||||||
@ -43,6 +57,7 @@ class toiprint(BasicNewsRecipe):
|
|||||||
.cap { text-align:center; font-size:small; }
|
.cap { text-align:center; font-size:small; }
|
||||||
img { display:block; margin:0 auto; }
|
img { display:block; margin:0 auto; }
|
||||||
.info { font-size:small; color:#404040; }
|
.info { font-size:small; color:#404040; }
|
||||||
|
.lead { color:#404040; }
|
||||||
'''
|
'''
|
||||||
|
|
||||||
def get_cover_url(self):
|
def get_cover_url(self):
|
||||||
@ -111,7 +126,7 @@ class toiprint(BasicNewsRecipe):
|
|||||||
elif x['TagName'] == 'Author':
|
elif x['TagName'] == 'Author':
|
||||||
body += '<p class="auth">' + x['ZoneText'].replace('<br>', '') + '</p>'
|
body += '<p class="auth">' + x['ZoneText'].replace('<br>', '') + '</p>'
|
||||||
elif x['TagName'] in 'ArticleBody':
|
elif x['TagName'] in 'ArticleBody':
|
||||||
body += x['ZoneText']
|
body += '<span>' + x['ZoneText'] + '</span>'
|
||||||
elif x['TagName'] in 'Information':
|
elif x['TagName'] in 'Information':
|
||||||
body += '<p class="info">' + x['ZoneText'] + '</p>'
|
body += '<p class="info">' + x['ZoneText'] + '</p>'
|
||||||
elif x['TagName'] in {'LinkTo', 'LinkFrom'}:
|
elif x['TagName'] in {'LinkTo', 'LinkFrom'}:
|
||||||
@ -122,12 +137,28 @@ class toiprint(BasicNewsRecipe):
|
|||||||
+ x['ZoneID'] + '.jpg&bucket=andre-toi-out&q=50')
|
+ x['ZoneID'] + '.jpg&bucket=andre-toi-out&q=50')
|
||||||
elif x['TagName'] == 'ImageCaption':
|
elif x['TagName'] == 'ImageCaption':
|
||||||
body += '<div class="cap">' + x['ZoneText'] + '</div><p>'
|
body += '<div class="cap">' + x['ZoneText'] + '</div><p>'
|
||||||
|
elif x['TagName'] == 'Lead':
|
||||||
|
body += '<div class="lead"><p><i>' + x['ZoneText'] + '</i></p></div><p>'
|
||||||
elif 'ZoneText' in x:
|
elif 'ZoneText' in x:
|
||||||
body += '<p><i>' + x['ZoneText'] + '</i></p>'
|
body += '<p><i>' + x['ZoneText'] + '</i></p>'
|
||||||
return '<html><body><div>' \
|
return '<html><body><div>' \
|
||||||
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \
|
+ body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('<br>', '<p>').replace('\n', '<br>') \
|
||||||
+ '</div></body></html>'
|
+ '</div></body></html>'
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
h1 = soup.find('h1')
|
||||||
|
if h1:
|
||||||
|
h2 = h1.next_sibling
|
||||||
|
if h2 and h2.has_attr('class') and 'sub' in h2['class']:
|
||||||
|
h3 = h2.next_sibling
|
||||||
|
if h3 and h3.has_attr('class') and 'sub' in h3['class']:
|
||||||
|
handle_images(h3, soup)
|
||||||
|
else:
|
||||||
|
handle_images(h2, soup)
|
||||||
|
else:
|
||||||
|
handle_images(h1, soup)
|
||||||
|
return soup
|
||||||
|
|
||||||
def print_version(self, url):
|
def print_version(self, url):
|
||||||
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
|
return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user