__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' financialexpress.com ''' from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class FE_India(BasicNewsRecipe): title = 'The Financial Express' __author__ = 'Darko Miletic' description = 'Financial news from India' publisher = 'The Indian Express Limited' category = 'news, politics, finances, India' oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'en_IN' remove_empty_feeds = True ignore_duplicate_articles = {'url'} publication_type = 'magazine' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [classes('wp-block-post-title wp-block-post-excerpt ie-network-post-meta-wrapper wp-block-post-featured-image wp-block-post-content')] remove_tags = [classes('parent_also_read')] remove_attributes = ['width', 'height'] feeds = [ # https://www.financialexpress.com/syndication/ # Print feeds ('Front Page','https://www.financialexpress.com/print/front-page/feed/'), ('Corporate Markets','https://www.financialexpress.com/print/corporate-markets/feed/'), ('Economy','https://www.financialexpress.com/print/economy-print/feed/'), ('Opinion','https://www.financialexpress.com/print/edits-columns/feed/'), ('personal Finance','https://www.financialexpress.com/print/personal-finance-print/feed/'), # ('Brandwagon', 'https://www.financialexpress.com/print/brandwagon/feed/'), # Other Feeds ('Economy', 'https://www.financialexpress.com/economy/feed/'), ('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'), ('Opinion', 'https://www.financialexpress.com/opinion/feed/'), ('Editorial', 'https://www.financialexpress.com/editorial/feed/'), ('Budget', 'https://www.financialexpress.com/budget/feed/'), ('Industry', 'https://www.financialexpress.com/industry/feed/'), ('Market', 'https://www.financialexpress.com/market/feed/'), ('Jobs', 'https://www.financialexpress.com/jobs/feed/'), ('SME', 'https://www.financialexpress.com/industry/sme/feed/'), ('Mutual Funds', 'https://www.financialexpress.com/money/mutual-funds/feed/'), ('Health','https://www.financialexpress.com/lifestyle/health/feed'), # ('Health Care','https://www.financialexpress.com/healthcare/feed'), ('Science','https://www.financialexpress.com/lifestyle/science/feed'), ('Infrastructure','https://www.financialexpress.com/infrastructure/feed'), ('Money','https://www.financialexpress.com/money/feed'), ] def get_cover_url(self): soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/') for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')): return citem['content'] def preprocess_html(self, soup, *a): for img in soup.findAll(attrs={'data-src': True}): img['src'] = img['data-src'] return soup