#!/usr/bin/env python from calibre.web.feeds.news import BasicNewsRecipe, classes class firstpost(BasicNewsRecipe): title = 'Firstpost' __author__ = 'unkn0wn' description = ( 'Firstpost.com will serve as a trusted guide to the crush of news and ideas around you.' ' With thoughtful analysis and fearless views our team of editors and writers will track' ' news in India and the world and provide a perspective that is reflective of a changing dynamic.' ) no_stylesheets = True use_embedded_content = False encoding = 'utf-8' language = 'en_IN' remove_attributes = ['height', 'width', 'style'] masthead_url = 'https://images.firstpost.com/wp-content/uploads/2016/03/FP-Logo.png?impolicy=website&width=600&height=60' max_articles_per_feed = 25 remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} extra_css = ''' .category-name, .author-info { font-size:small; color:#202020; } .wp-caption-text { font-size:small; text-align:center; } ''' keep_only_tags = [ classes('article-sect') ] remove_tags = [ classes('art-rel-articles tags-wrap'), dict(name='svg'), ] feeds = [] sections = [ 'india', 'politics', 'opinion', 'explainers', 'business', 'world', 'web-stories', 'tech', 'artandculture', 'health', 'health-supplement', # 'photos', 'entertainment', 'living', 'education', 'sports', 'firstcricket', ] oldest_article = 1.2 # days for sec in sections: a = 'https://www.firstpost.com/rss/{}.xml' feeds.append((sec.capitalize(), a.format(sec))) def preprocess_html(self, soup): if h2 := soup.find('h2', attrs={'class':'category-name'}): h2.name = 'p' if h := soup.find('h2', attrs={'class':'inner-copy'}): h.name = 'p' for img in soup.findAll('img', attrs={'data-src':True}): img['src'] = img['data-src'] return soup