From 06325e93ba5c5e80817d1a4b8ad62a9ef6603a86 Mon Sep 17 00:00:00 2001
From: Shiva Prasad <shivapv@pm.me>
Date: Fri, 11 Jun 2021 02:51:22 +0530
Subject: [PATCH] Fix & improve The Hindu recipe

Summary:
	* Fix: loading lead image of articles
	* Fix: avoid duplicating of subheading
	* Add: article summary to show up in TOC
---
 recipes/hindu.recipe | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/recipes/hindu.recipe b/recipes/hindu.recipe
index b29b8f17ce..2ba4c55a4e 100644
--- a/recipes/hindu.recipe
+++ b/recipes/hindu.recipe
@@ -3,7 +3,7 @@ __license__ = 'GPL 3'
 __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
 
 from calibre.web.feeds.news import BasicNewsRecipe
-import string
+import string, re
 
 
 def classes(classes):
@@ -30,10 +30,26 @@ class TheHindu(BasicNewsRecipe):
     ]
 
     def preprocess_html(self, soup):
-        for img in soup.findAll('img', attrs={'data-src-template': True}):
-            img['src'] = img['data-src-template'].replace('BINARY/thumbnail', 'alternates/FREE_660')
+        img = soup.find('img', attrs={'class': 'lead-img'})
+        try:
+            src = img.parent.find('source').get('srcset')
+            img['src'] = re.sub(r'(ALTERNATES)/.+?/', r'\1/FREE_660/', src)
+        except (TypeError, AttributeError):
+            pass
+        # Remove duplicate intro
+        for h in soup.findAll('h2', attrs={'class': 'intro'})[1:]:
+            h.extract()
         return soup
 
+    def populate_article_metadata(self, article, soup, first):
+        try:
+            desc = soup.find('meta', attrs={'name': 'description'}).get('content')
+            if not desc.startswith('Todays paper'):
+                desc += '...' if len(desc) >= 199 else ''   # indicate truncation
+                article.text_summary = article.summary = desc
+        except AttributeError:
+            return
+
     def articles_from_soup(self, soup):
         ans = []
         div = soup.find('section', attrs={'id': 'section_'})