The India Forum recipe

and fix for Live Mint.
2025-12-12 08:05:05 -05:00 · 2023-06-12 20:29:28 +05:30 · 2023-06-12 20:29:28 +05:30 · 20011f4b51
commit 20011f4b51
parent 15f68d97da
3 changed files with 71 additions and 2 deletions
--- a/recipes/icons/theindiaforum.png
+++ b/recipes/icons/theindiaforum.png
--- a/recipes/livemint.recipe
+++ b/recipes/livemint.recipe
@ -129,8 +129,6 @@ class LiveMint(BasicNewsRecipe):
                return raw

        def preprocess_html(self, soup):
-            for h2 in soup.find('h2'):
-                h2.name = 'p'
            for span in soup.findAll('figcaption'):
                span['id'] = 'img-cap'
            for auth in soup.findAll('span', attrs={'class':['articleInfo pubtime','articleInfo author']}):
--- a/recipes/theindiaforum.recipe
+++ b/recipes/theindiaforum.recipe
@ -0,0 +1,71 @@
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+from datetime import datetime, timezone, timedelta
+from calibre.utils.date import parse_date
+
+class mains(BasicNewsRecipe):
+    title = 'The India Forum'
+    __author__ = 'unkn0wn'
+    description = (
+        'The India Forum is an independent online journal-magazine that seeks to widen and '
+        'deepen our conversations on the issues that concern us.'
+    )
+    language = 'en_IN'
+    encoding = 'utf-8'
+    ignore_duplicate_articles = {'url'}
+    remove_attributes = ['height', 'width', 'style']
+    no_stylesheets = True 
+    resolve_internal_links = True
+    remove_empty_feeds = True
+    use_embedded_content = False
+    oldest_article = 30  # days
+    masthead_url = 'https://www.theindiaforum.in/themes/the_india_forum/images/tif_logo.png'
+
+    keep_only_tags = [
+        classes('article-lead-container block-views-blockarticle-block-1'),
+        dict(name='section', attrs={'id':'article-author-top-container'}),
+        classes('block-field-blocknodearticlebody block-field-blocknodearticlefield-references')
+    ]
+
+    def parse_index(self):
+        soup = self.index_to_soup('https://www.theindiaforum.in/')
+        ul = soup.find('ul', attrs={'class':'float-left'}) 
+        
+        section_list = []
+       
+        for x in ul.findAll('a', href=True):
+            if '/podcast' in x['href']:
+                continue
+            section_list.append(
+                (self.tag_to_string(x).strip().replace('■','■ '), 'https://www.theindiaforum.in' + x['href'])
+            )
+       
+        feeds = []
+
+        for section in section_list:
+            section_title = section[0]
+            section_url = section[1]
+            self.log(section_title, section_url)
+            soup = self.index_to_soup(section_url)
+            articles = self.articles_from_soup(soup)
+            if articles:
+                feeds.append((section_title, articles))
+        return feeds
+    
+    def articles_from_soup(self, soup):
+        ans = []
+        for art in soup.findAll('div', attrs={'class':lambda x: x and 'views-col' in x.split()}):
+            h2 = art.find(['h2', 'h3'])
+            url = 'https://www.theindiaforum.in' + h2.a['href']
+            title = self.tag_to_string(h2).strip()
+            desc = ''
+            if summ := art.find(**classes('summary')):
+                desc = self.tag_to_string(summ)
+                if inline := summ.find(**classes('inline-date')):
+                    date = parse_date(self.tag_to_string(inline))
+                    today = (datetime.now(timezone.utc)).replace(microsecond=0)
+                    if (today - date) > timedelta(self.oldest_article):
+                        continue
+            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
+            ans.append({'title': title, 'url': url, 'description': desc})
+        return ans
+