From 3f013c3856b10bd6fbffcd0f0084c7d59773d47e Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 31 Aug 2022 20:43:19 +0530
Subject: [PATCH] update Live Law and Live Mint

---
 recipes/live_law.recipe |  3 ++-
 recipes/livemint.recipe | 59 ++++++++++++++++++++++-------------------
 2 files changed, 33 insertions(+), 29 deletions(-)
diff --git a/recipes/live_law.recipe b/recipes/live_law.recipe
index 1e2dc56c8e..58bf6b4943 100644
--- a/recipes/live_law.recipe
+++ b/recipes/live_law.recipe
@@ -35,6 +35,7 @@ class livelaw(BasicNewsRecipe):
     ]
 
     remove_tags = [
+        classes('in-image-ad-wrap'),
         dict(
             name='div',
             attrs={'id': lambda x: x and x.startswith('inside_post_content_ad')}
@@ -91,7 +92,7 @@ class livelaw(BasicNewsRecipe):
     def is_accepted_entry(self, entry):
         # Those sections in the top nav bar that we will omit
         omit_list = [
-            'videos', 'job-updates', 'events-corner', 'sponsored', 'hindi.livelaw.in'
+            'videos', 'job-updates', 'events-corner', 'sponsored', 'hindi.livelaw.in', 'javascript:void(0);',
         ]
         is_accepted = True
         for omit_entry in omit_list:
diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe
index 26cd09a29e..a112968816 100644
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@@ -13,7 +13,7 @@ class LiveMint(BasicNewsRecipe):
     title = u'Live Mint'
     description = 'Financial News from India.'
     language = 'en_IN'
-    __author__ = 'Krittika Goyal'
+    __author__ = 'Krittika Goyal, revised by unkn0wn'
     oldest_article = 1.15  # days
     max_articles_per_feed = 50
     encoding = 'utf-8'
@@ -48,6 +48,11 @@ class LiveMint(BasicNewsRecipe):
             ('How to Lounge','https://lifestyle.livemint.com/rss/how-to-lounge'),
             ('Smart Living','https://lifestyle.livemint.com/rss/smart-living'),
         ]
+
+        def preprocess_html(self, soup):
+            for img in soup.findAll('img', attrs={'data-img': True}):
+                img['src'] = img['data-img']
+            return soup
     else:
         # some wsj articles wont load
         extra_css = '''
@@ -90,31 +95,29 @@ class LiveMint(BasicNewsRecipe):
             ('Elections', 'https://www.livemint.com/rss/elections'),
         ]
 
-    def preprocess_raw_html(self, raw, *a):
-        if '<script>var wsjFlag=true;</script>' in raw:
-            m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
-            raw1 = raw[m.start():]
-            raw1 = raw1.split('>', 1)[1].strip()
-            data = json.JSONDecoder().raw_decode(raw1)[0]
-            value = data['hasPart']['value']
-            body = data['articleBody'] + '</p> <p>' + re.sub(r'([a-z]\.|[0-9]\.)([A-Z])', r'\1 <p> \2', value)
-            body = '<div class="FirstEle"> <p>' +  body  + '</p> </div>'
-            raw = re.sub(r'<div class="FirstEle">([^}]*)</div>', body, raw)
-            return raw
-        else:
-            return raw
+        def preprocess_raw_html(self, raw, *a):
+            if '<script>var wsjFlag=true;</script>' in raw:
+                m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
+                raw1 = raw[m.start():]
+                raw1 = raw1.split('>', 1)[1].strip()
+                data = json.JSONDecoder().raw_decode(raw1)[0]
+                value = data['hasPart']['value']
+                body = data['articleBody'] + '</p> <p>'\
+                        + re.sub(r'(([a-z]|[^A-Z])\.|\.”)([A-Z]|“[A-Z])', r'\1 <p> \3', value)
+                body = '<div class="FirstEle"> <p>' +  body  + '</p> </div>'
+                raw = re.sub(r'<div class="FirstEle">([^}]*)</div>', body, raw)
+                return raw
+            else:
+                return raw
 
-    def preprocess_html(self, soup):
-        for span in soup.findAll('figcaption'):
-            span['id'] = 'img-cap'
-        for auth in soup.findAll('span', attrs={'class':['articleInfo pubtime','articleInfo author']}):
-            auth['id'] = 'auth-info'
-            auth.name = 'div'
-        for span in soup.findAll('span', attrs={'class':'exclusive'}):
-            span.extract()
-        for img in soup.findAll('img', attrs={'data-src': True}):
-            img['src'] = img['data-src']
-        if is_saturday:
-            for img in soup.findAll('img', attrs={'data-img': True}):
-                img['src'] = img['data-img']
-        return soup
+        def preprocess_html(self, soup):
+            for span in soup.findAll('figcaption'):
+                span['id'] = 'img-cap'
+            for auth in soup.findAll('span', attrs={'class':['articleInfo pubtime','articleInfo author']}):
+                auth['id'] = 'auth-info'
+                auth.name = 'div'
+            for span in soup.findAll('span', attrs={'class':'exclusive'}):
+                span.extract()
+            for img in soup.findAll('img', attrs={'data-src': True}):
+                img['src'] = img['data-src']
+            return soup