calibre/recipes/fe_india.recipe

__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
financialexpress.com
'''

from calibre.web.feeds.news import BasicNewsRecipe


def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(attrs={
        'class': lambda x: x and frozenset(x.split()).intersection(q)})


class FE_India(BasicNewsRecipe):
    title = 'The Financial Express'
    __author__ = 'Darko Miletic'
    description = 'Financial news from India'
    publisher = 'The Indian Express Limited'
    category = 'news, politics, finances, India'
    oldest_article = 30
    max_articles_per_feed = 200
    no_stylesheets = True
    encoding = 'utf-8'
    use_embedded_content = False
    language = 'en_IN'
    remove_empty_feeds = True
    publication_type = 'magazine'

    conversion_options = {
        'comment': description, 'tags': category, 'publisher': publisher, 'language': language
    }

    keep_only_tags = [classes('post-title place-line leftstory')]
    remove_attributes = ['width', 'height']

    feeds = [
        ('Latest news', 'https://www.financialexpress.com/feed/'),
        ('Economy', 'https://www.financialexpress.com/economy/feed/'),
        ('Industry', 'https://www.financialexpress.com/industry/feed/'),
        ('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'),
        ('Companies', 'https://www.financialexpress.com/industry/companies/feed/'),
        ('Jobs', 'https://www.financialexpress.com/industry/jobs/feed/'),
        ('Tech', 'https://www.financialexpress.com/industry/tech/feed/'),
        ('Lifestyle', 'https://www.financialexpress.com/industry/lifestyle/feed/'),
        ('Health', 'https://www.financialexpress.com/industry/health/feed/'),
        ('Science', 'https://www.financialexpress.com/industry/science/feed/'),
        ('Sports', 'https://www.financialexpress.com/industry/sports/feed/'),
        ('Fe Columnist', 'https://www.financialexpress.com/industry/fe-columnist/feed/'),
    ]

    def preprocess_html(self, soup, *a):
        for img in soup.findAll(attrs={'data-src': True}):
            img['src'] = img['data-src']
        return soup