From 6d3865ca107384cc0530b62ef90b2613a28fb2f0 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 11 Mar 2024 09:27:33 +0530
Subject: [PATCH] update livemint

---
 recipes/livemint.recipe        | 10 +++++++---
 recipes/natgeo.recipe          | 10 +++++-----
 recipes/natgeohis.recipe       | 10 +++++-----
 recipes/natgeomag.recipe       | 10 +++++-----
 recipes/phillosophy_now.recipe |  2 +-
 5 files changed, 23 insertions(+), 19 deletions(-)
diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe
index 9010d0f2b8..b59bfb6b12 100644
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@@ -84,6 +84,7 @@ class LiveMint(BasicNewsRecipe):
 
         extra_css = '''
             img {margin:0 auto;}
+            .psTopLogoItem img, .ecologoStory { width:100; }
             #img-cap {font-size:small; text-align:center;}
             .summary, .highlights, .synopsis {
                 font-weight:normal !important; font-style:italic; color:#202020;
@@ -129,7 +130,11 @@ class LiveMint(BasicNewsRecipe):
 
         def preprocess_raw_html(self, raw, *a):
             # remove empty p tags
-            raw = re.sub(r'(<p>\s*&nbsp;\s*<\/p>)|(<p>\s*<\/p>)', '', raw)
+            raw = re.sub(
+                r'(<p>\s*)(<[^(\/|a|i|b|em|strong)])', '\g<2>', re.sub(
+                    r'(<p>\s*&nbsp;\s*<\/p>)|(<p>\s*<\/p>)|(<p\s*\S+>&nbsp;\s*<\/p>)', '', raw
+                )
+            )
             if '<script>var wsjFlag=true;</script>' in raw:
                 m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
                 raw1 = raw[m.start():]
@@ -141,8 +146,7 @@ class LiveMint(BasicNewsRecipe):
                 body = '<div class="FirstEle"> <p>' +  body  + '</p> </div>'
                 raw2 = re.sub(r'<div class="FirstEle">([^}]*)</div>', body, raw)
                 return raw2
-            else:
-                return raw
+            return raw
 
         def preprocess_html(self, soup):
             for strong in soup.findAll('strong'):
diff --git a/recipes/natgeo.recipe b/recipes/natgeo.recipe
index 3580693ad9..4f902af9cc 100644
--- a/recipes/natgeo.recipe
+++ b/recipes/natgeo.recipe
@@ -53,8 +53,8 @@ def parse_inline(inl):
             yield '<div class="img"><img src="{}"></div>'.format(props['image']['src'])
         if 'caption' in props:
             yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    props['caption']['text'], ' ' + props['caption']['credit']
-                )
+                props['caption']['text'], ' ' + props['caption']['credit']
+            )
         yield '</p>'
     if inl.get('content', {}).get('name', '') == 'ImageGroup':
         if 'images' in inl['content']['props']:
@@ -64,8 +64,8 @@ def parse_inline(inl):
                     yield '<div class="img"><img src="{}"></div>'.format(imgs['src'])
                 if 'caption' in imgs:
                     yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    imgs['caption']['text'], ' ' + imgs['caption']['credit']
-                )
+                        imgs['caption']['text'], ' ' + imgs['caption']['credit']
+                    )
                 yield '</p>'
 
 
@@ -83,7 +83,7 @@ def parse_body(x):
             tag = x['type']
             if tag == 'inline':
                 yield ''.join(parse_inline(x))
-            elif 'attrs' in x and 'href' in x.get('attrs', {}):
+            elif 'attrs' in x and 'href' in x.get('attrs', ''):
                 yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
                 for yld in parse_cont(x):
                     yield yld
diff --git a/recipes/natgeohis.recipe b/recipes/natgeohis.recipe
index 89a2b59ea0..d52a63f7c5 100644
--- a/recipes/natgeohis.recipe
+++ b/recipes/natgeohis.recipe
@@ -52,8 +52,8 @@ def parse_inline(inl):
             yield '<div class="img"><img src="{}"></div>'.format(props['image']['src'])
         if 'caption' in props:
             yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    props['caption']['text'], ' ' + props['caption']['credit']
-                )
+                props['caption']['text'], ' ' + props['caption']['credit']
+            )
         yield '</p>'
     if inl.get('content', {}).get('name', '') == 'ImageGroup':
         if 'images' in inl['content']['props']:
@@ -63,8 +63,8 @@ def parse_inline(inl):
                     yield '<div class="img"><img src="{}"></div>'.format(imgs['src'])
                 if 'caption' in imgs:
                     yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    imgs['caption']['text'], ' ' + imgs['caption']['credit']
-                )
+                        imgs['caption']['text'], ' ' + imgs['caption']['credit']
+                    )
                 yield '</p>'
 
 
@@ -82,7 +82,7 @@ def parse_body(x):
             tag = x['type']
             if tag == 'inline':
                 yield ''.join(parse_inline(x))
-            elif 'attrs' in x and 'href' in x.get('attrs', {}):
+            elif 'attrs' in x and 'href' in x.get('attrs', ''):
                 yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
                 for yld in parse_cont(x):
                     yield yld
diff --git a/recipes/natgeomag.recipe b/recipes/natgeomag.recipe
index 537263eb65..d4c179c88c 100644
--- a/recipes/natgeomag.recipe
+++ b/recipes/natgeomag.recipe
@@ -57,8 +57,8 @@ def parse_inline(inl):
             yield '<div class="img"><img src="{}"></div>'.format(props['image']['src'])
         if 'caption' in props:
             yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    props['caption']['text'], ' ' + props['caption']['credit']
-                )
+                props['caption']['text'], ' ' + props['caption']['credit']
+            )
         yield '</p>'
     if inl.get('content', {}).get('name', '') == 'ImageGroup':
         if 'images' in inl['content']['props']:
@@ -68,8 +68,8 @@ def parse_inline(inl):
                     yield '<div class="img"><img src="{}"></div>'.format(imgs['src'])
                 if 'caption' in imgs:
                     yield '<div class="cap">{}<span class="cred">{}</span></div>'.format(
-                    imgs['caption']['text'], ' ' + imgs['caption']['credit']
-                )
+                        imgs['caption']['text'], ' ' + imgs['caption']['credit']
+                    )
                 yield '</p>'
 
 
@@ -87,7 +87,7 @@ def parse_body(x):
             tag = x['type']
             if tag == 'inline':
                 yield ''.join(parse_inline(x))
-            elif 'attrs' in x and 'href' in x.get('attrs', {}):
+            elif 'attrs' in x and 'href' in x.get('attrs', ''):
                 yield '<' + tag + ' href = "{}">'.format(x['attrs']['href'])
                 for yld in parse_cont(x):
                     yield yld
diff --git a/recipes/phillosophy_now.recipe b/recipes/phillosophy_now.recipe
index 1fb4810ddc..350a57b4f0 100644
--- a/recipes/phillosophy_now.recipe
+++ b/recipes/phillosophy_now.recipe
@@ -27,7 +27,7 @@ class PhilosophyNow(BasicNewsRecipe):
     remove_tags = [dict(name='div', attrs={'id':'welcome_box'})]
     extra_css = '''
         img {display:block; margin:0 auto;}
-        .articleImage { font-size:small; text-align:center; }
+        .articleImageCaption { font-size:small; text-align:center; }
         em, blockquote { color:#202020; }
     '''