diff --git a/recipes/livemint.recipe b/recipes/livemint.recipe index eb237de735..6bb2fe6595 100644 --- a/recipes/livemint.recipe +++ b/recipes/livemint.recipe @@ -1,4 +1,6 @@ -from calibre.web.feeds.news import BasicNewsRecipe +#!/usr/bin/env python + +from calibre.web.feeds.news import BasicNewsRecipe, classes class LiveMint(BasicNewsRecipe): @@ -7,26 +9,45 @@ class LiveMint(BasicNewsRecipe): __author__ = 'Krittika Goyal' oldest_article = 1 # days max_articles_per_feed = 50 + encoding = 'utf-8' use_embedded_content = False - no_stylesheets = True - auto_cleanup = True - feeds = [ - ('Companies','https://www.livemint.com/rss/companies'), - ('Opinion','https://www.livemint.com/rss/opinion'), - ('Money','https://www.livemint.com/rss/money'), - ('Politics','https://www.livemint.com/rss/politics'), - ('Science','https://www.livemint.com/rss/science'), - ('Industry','https://www.livemint.com/rss/industry'), - ('Lounge','https://www.livemint.com/rss/lounge'), - ('Education','https://www.livemint.com/rss/education'), - ('Sports','https://www.livemint.com/rss/sports'), - ('Technology','https://www.livemint.com/rss/technology'), - ('News','https://www.livemint.com/rss/news'), - ('Mutual Funds','https://www.livemint.com/rss/Mutual Funds'), - ('Markets','https://www.livemint.com/rss/markets'), - ('AI','https://www.livemint.com/rss/AI'), - ('Insurance','https://www.livemint.com/rss/insurance'), - ('Budget','https://www.livemint.com/rss/budget'), - ('Elections','https://www.livemint.com/rss/elections'), + remove_attributes = ['style', 'height', 'width'] + + keep_only_tags = [ + dict(name='h1'), + dict(name='picture'), + dict(name='figcaption'), + classes('articleInfo FirstEle summary highlights paywall'), ] + remove_tags = [ + classes( + 'trendingSimilarHeight moreNews mobAppDownload label msgError msgOk' + ) + ] + + feeds = [ + ('Companies', 'https://www.livemint.com/rss/companies'), + ('Opinion', 'https://www.livemint.com/rss/opinion'), + ('Money', 'https://www.livemint.com/rss/money'), + ('Economy', 'https://www.livemint.com/rss/economy/'), + ('Politics', 'https://www.livemint.com/rss/politics'), + ('Science', 'https://www.livemint.com/rss/science'), + ('Industry', 'https://www.livemint.com/rss/industry'), + ('Lounge', 'https://www.livemint.com/rss/lounge'), + ('Education', 'https://www.livemint.com/rss/education'), + ('Sports', 'https://www.livemint.com/rss/sports'), + ('Technology', 'https://www.livemint.com/rss/technology'), + ('News', 'https://www.livemint.com/rss/news'), + ('Mutual Funds', 'https://www.livemint.com/rss/Mutual Funds'), + ('Markets', 'https://www.livemint.com/rss/markets'), + ('AI', 'https://www.livemint.com/rss/AI'), + ('Insurance', 'https://www.livemint.com/rss/insurance'), + ('Budget', 'https://www.livemint.com/rss/budget'), + ('Elections', 'https://www.livemint.com/rss/elections'), + ] + + def preprocess_html(self, soup): + for img in soup.findAll('img', attrs={'data-src': True}): + img['src'] = img['data-src'] + return soup