From 81639f749a310ac89f83db8472b932966a65567c Mon Sep 17 00:00:00 2001
From: Kovid Goyal
Date: Thu, 18 Aug 2022 17:50:25 +0530
Subject: [PATCH] Update Live Mint
---
recipes/livemint.recipe | 21 ++++++++++++++++++++-
1 file changed, 20 insertions(+), 1 deletion(-)
diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe
index b02f9ae343..26cd09a29e 100644
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@@ -1,8 +1,11 @@
#!/usr/bin/env python
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+import json
+import re
from datetime import date
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+
is_saturday = date.today().weekday() == 5
@@ -46,10 +49,12 @@ class LiveMint(BasicNewsRecipe):
('Smart Living','https://lifestyle.livemint.com/rss/smart-living'),
]
else:
+ # some wsj articles wont load
extra_css = '''
#img-cap {font-size:small; text-align:center;}
#auth-info {font-size:small; text-align:center;}
.highlights {font-style:italic;}
+ .summary{font-style:italic; color:#404040;}
'''
cover_url = 'https://epsfs.hindustantimes.com/MINT/2022/04/05/Delhi/Delhi/5_01/1ec7ad14_01_mr.jpg'
@@ -85,6 +90,20 @@ class LiveMint(BasicNewsRecipe):
('Elections', 'https://www.livemint.com/rss/elections'),
]
+ def preprocess_raw_html(self, raw, *a):
+ if '' in raw:
+ m = re.search(r'type="application/ld\+json">[^<]+?"@type": "NewsArticle"', raw)
+ raw1 = raw[m.start():]
+ raw1 = raw1.split('>', 1)[1].strip()
+ data = json.JSONDecoder().raw_decode(raw1)[0]
+ value = data['hasPart']['value']
+ body = data['articleBody'] + '
' + re.sub(r'([a-z]\.|[0-9]\.)([A-Z])', r'\1
\2', value)
+ body = '
'
+ raw = re.sub(r'([^}]*)
', body, raw)
+ return raw
+ else:
+ return raw
+
def preprocess_html(self, soup):
for span in soup.findAll('figcaption'):
span['id'] = 'img-cap'