__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' financialexpress.com ''' from calibre.web.feeds.news import BasicNewsRecipe def classes(classes): q = frozenset(classes.split(' ')) return dict(attrs={ 'class': lambda x: x and frozenset(x.split()).intersection(q)}) class FE_India(BasicNewsRecipe): title = 'The Financial Express' __author__ = 'Darko Miletic' description = 'Financial news from India' publisher = 'The Indian Express Limited' category = 'news, politics, finances, India' oldest_article = 30 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'en_IN' remove_empty_feeds = True publication_type = 'magazine' conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [classes('post-title place-line leftstory')] remove_attributes = ['width', 'height'] feeds = [ ('Latest news', 'https://www.financialexpress.com/feed/'), ('Economy', 'https://www.financialexpress.com/economy/feed/'), ('Industry', 'https://www.financialexpress.com/industry/feed/'), ('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'), ('Companies', 'https://www.financialexpress.com/industry/companies/feed/'), ('Jobs', 'https://www.financialexpress.com/industry/jobs/feed/'), ('Tech', 'https://www.financialexpress.com/industry/tech/feed/'), ('Lifestyle', 'https://www.financialexpress.com/industry/lifestyle/feed/'), ('Health', 'https://www.financialexpress.com/industry/health/feed/'), ('Science', 'https://www.financialexpress.com/industry/science/feed/'), ('Sports', 'https://www.financialexpress.com/industry/sports/feed/'), ('Fe Columnist', 'https://www.financialexpress.com/industry/fe-columnist/feed/'), ] def preprocess_html(self, soup, *a): for img in soup.findAll(attrs={'data-src': True}): img['src'] = img['data-src'] return soup