hindutamil.recipe

2025-08-30 23:00:21 -04:00 · 2023-07-02 12:38:41 +05:30 · 2023-07-02 12:38:41 +05:30 · 09df62c428
commit 09df62c428
parent 3d2287ef49
2 changed files with 73 additions and 0 deletions
--- a/recipes/hindutamil.recipe
+++ b/recipes/hindutamil.recipe
@ -0,0 +1,73 @@
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+
+class hindutamil(BasicNewsRecipe):
+    title = 'இந்து தமிழ் திசை'
+    __author__ = 'unkn0wn'
+    description = (
+        'Hindu Tamil Thisai stands differentiated from the rest of the language dailies in Tamil Nadu '
+        'through its unbiased news coverage, in-depth analysis of international, national and local issues.'
+    )
+    no_stylesheets = True
+    use_embedded_content = False
+    encoding = 'utf-8'
+    language = 'ta'
+    remove_attributes = ['style', 'height', 'width']
+    masthead_url = 'https://static.hindutamil.in/hindu/static/store/images/logo.png'
+    
+    def get_browser(self):
+        return BasicNewsRecipe.get_browser(self, user_agent='common_words/based') 
+    
+    keep_only_tags = [
+        classes('main-article')
+    ]
+    
+    remove_tags = [
+        classes('newsbot-ads article-details-ads-inner art-follow-title1 dont-miss-it')
+    ]
+        
+    ignore_duplicate_articles = {'title'}
+    remove_empty_feeds = True    
+
+    articles_are_obfuscated = True
+
+    def get_obfuscated_article(self, url):
+        br = self.get_browser()
+        try:
+            br.open(url)
+        except Exception as e:
+            url = e.hdrs.get('location')
+        soup = self.index_to_soup(url)
+        link = soup.find('a', href=True)
+        skip_sections =[ # add sections you want to skip
+            '/video/', '/videos/', '/media/'
+        ]
+        if any(x in link['href'] for x in skip_sections):
+            self.log('Aborting Article ', link['href'])
+            self.abort_article('skipping video links')
+
+        self.log('Downloading ', link['href'])
+        html = br.open(link['href']).read()
+        pt = PersistentTemporaryFile('.html')
+        pt.write(html)
+        pt.close()
+        return pt.name
+
+    feeds = []
+
+    sections = [
+        ('தமிழகம்', 'tamilnadu'),
+        ('இந்தியா', 'india'),
+        ('கருத்துப் பேழை', 'opinion'),
+        ('உலகம்', 'world'),
+        ('வணிகம்', 'business'),
+        # ('விளையாட்டு', 'sports'),
+        # ('தமிழ் சினிமா', 'cinema'),
+        ('தொழில்நுட்பம்', 'technology'),
+        # ('இணைப்பிதழ்கள்', 'supplements'),
+    ]
+
+    for sec in sections:
+        a = 'https://news.google.com/rss/search?q=when:27h+allinurl:hindutamil.in%2Fnews{}&hl=ta-IN&gl=IN&ceid=IN:ta'
+        feeds.append((sec[0], a.format('%2F' + sec[1] + '%2F')))
+    # feeds.append(('Others', a.format('')))
--- a/recipes/icons/hindutamil.png
+++ b/recipes/icons/hindutamil.png