From fe60dd936f6afbde8e38f37fa74e7bf895c2e78f Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 27 Jan 2024 12:55:10 +0530
Subject: [PATCH] Update Business Today

---
 recipes/business_today.recipe | 46 ++++++++++++++++++++++++++++-------
 recipes/toiprint.recipe       | 33 ++++++++++++++++++++++++-
 2 files changed, 69 insertions(+), 10 deletions(-)

diff --git a/recipes/business_today.recipe b/recipes/business_today.recipe
index ca6077648b..44c5705ba3 100644
--- a/recipes/business_today.recipe
+++ b/recipes/business_today.recipe
@@ -7,6 +7,8 @@ class BT(BasicNewsRecipe):
     __author__ = 'unkn0wn'
     no_stylesheets = True
     use_embedded_content = False
+    remove_javascript = True
+    encoding = 'utf-8'
     remove_attributes = ['style', 'height', 'width']
     ignore_duplicate_articles = {'url'}
     description = (
@@ -16,24 +18,36 @@ class BT(BasicNewsRecipe):
     masthead_url = 'https://akm-img-a-in.tosshub.com/businesstoday/resource/img/logo.png'
 
     keep_only_tags = [
-        classes('story-heading sab-head-tranlate-sec user-detial-left main-img field--name-body'),
+        classes('story-heading sab-head-tranlate-sec brand-detial-main main-img field--name-body'),
     ]
+
     remove_tags = [
+        dict(name=['link', 'meta', 'svg', 'button', 'script']),
         dict(name='a', attrs={'title': 'videos'}),
-        classes('tranding-topics-main newsltter-iframe hedlineteg')
+        classes(
+            'tranding-topics-main newsltter-iframe hedlineteg stoybday-ad story-recommended-chunk '
+            'banner_content'
+        )
     ]
-    extra_css = 'a[href^="https://www.businesstoday.in/videos"]{display: none;}'
+    extra_css = '''
+        img {display:block; margin:0 auto;}
+        em { color:#202020; }
+        .main-img { font-size:small; text-align:center; }
+        .summary {font-style:italic; color:#202020; }
+    '''
 
     def parse_index(self):
-        soup = self.index_to_soup('https://www.businesstoday.in/magazine')
-        issue = soup.find(attrs={'class': 'swiper-wrapper'})
-        a = issue.findAll('a', href=lambda x: x and '/magazine/issue/' in x)[1]
+        self.log(
+            '\n***\nif this recipe fails, report it on: '
+            'https://www.mobileread.com/forums/forumdisplay.php?f=228\n***\n'
+        )
+        soup = self.index_to_soup('https://www.businesstoday.in')
+        a = soup.findAll('a', attrs={'class':'mag_sld_img'})[1]
+        self.cover_url = a.img['data-src'].split('?')[0]
         url = a['href']
         self.log('issue =', url)
+        self.timefmt = ' [' + url.split('/')[-1] + ']'
         soup = self.index_to_soup(url)
-        tag = soup.find(attrs={'class': 'issue-image'})
-        if tag:
-            self.cover_url = tag.find('img')['src']
         section = None
         sections = {}
 
@@ -78,6 +92,20 @@ class BT(BasicNewsRecipe):
         return feeds
 
     def preprocess_html(self, soup):
+        auth = soup.find(**classes('brand-detial-main'))
+        if auth:
+            ul = auth.find('ul')
+            if ul:
+                ul.decompose()
+        for vid in soup.findAll('a', attrs={
+                'href': lambda x: x and 'businesstoday.in/videos' in x
+            }):
+            vid.decompose()
+        summ = soup.find(**classes('summary'))
+        if summ:
+            h2 = summ.find('h2')
+            if h2:
+                h2.name = 'p'
         for img in soup.findAll('img', attrs={'data-src': True}):
             img['src'] = img['data-src'].split('?')[0]
         return soup
diff --git a/recipes/toiprint.recipe b/recipes/toiprint.recipe
index d901a85c64..3f9f503501 100644
--- a/recipes/toiprint.recipe
+++ b/recipes/toiprint.recipe
@@ -23,6 +23,20 @@ date_ = dt.strftime('%d_%m_%Y')
 index = 'https://asset.harnscloud.com/PublicationData/TOI/' + le + '/' + date0
 img_index = 'https://cmsimages.timesgroup.com/image-resizer?epaper_s3_path=PublicationData/TOI/' + le + '/' + date0
 
+def handle_images(x, soup):
+    img = soup.find('img')
+    if img:
+        img_div = img.findParent('div')
+        cap = img_div.next_sibling
+        if cap and cap.has_attr('class') and 'cap' in cap['class']:
+            x.insert_after(img_div)
+            img_div.insert_after(cap)
+        else:
+            x.insert_after(img_div)
+    lead = soup.find('div', attrs={'class':'lead'})
+    if lead:
+        x.insert_after(lead)
+
 class toiprint(BasicNewsRecipe):
     title = 'TOI Print Edition'
     language = 'en_IN'
@@ -43,6 +57,7 @@ class toiprint(BasicNewsRecipe):
         .cap { text-align:center; font-size:small; }
         img { display:block; margin:0 auto; }
         .info { font-size:small; color:#404040; }
+        .lead { color:#404040; }
     '''
 
     def get_cover_url(self):
@@ -111,7 +126,7 @@ class toiprint(BasicNewsRecipe):
             elif x['TagName'] == 'Author':
                 body += '<p class="auth">' + x['ZoneText'].replace('<br>', '') + '</p>'
             elif x['TagName'] in 'ArticleBody':
-                body += x['ZoneText']
+                body += '<span>' + x['ZoneText'] + '</span>'
             elif x['TagName'] in 'Information':
                 body += '<p class="info">' + x['ZoneText'] + '</p>'
             elif x['TagName'] in {'LinkTo', 'LinkFrom'}:
@@ -122,12 +137,28 @@ class toiprint(BasicNewsRecipe):
                      + x['ZoneID'] + '.jpg&bucket=andre-toi-out&q=50')
             elif x['TagName'] == 'ImageCaption':
                 body += '<div class="cap">' + x['ZoneText'] + '</div><p>'
+            elif x['TagName'] == 'Lead':
+                body += '<div class="lead"><p><i>' + x['ZoneText'] + '</i></p></div><p>'
             elif 'ZoneText' in x:
                 body += '<p><i>' + x['ZoneText'] + '</i></p>'
         return '<html><body><div>' \
                     + body.replace('<br>', '<p>').replace('<br/>', '<p>').replace('&lt;br&gt;', '<p>').replace('\n', '<br>') \
                         + '</div></body></html>'
 
+    def preprocess_html(self, soup):
+        h1 = soup.find('h1')
+        if h1:
+            h2 = h1.next_sibling
+            if h2 and h2.has_attr('class') and 'sub' in h2['class']:
+                h3 = h2.next_sibling
+                if h3 and h3.has_attr('class') and 'sub' in h3['class']:
+                    handle_images(h3, soup)
+                else:
+                    handle_images(h2, soup)
+            else:
+                handle_images(h1, soup)
+        return soup
+
     def print_version(self, url):
         return index + '/ArticleZoneJson/' + url.split('_')[-3] + '/' + url + '.json'