calibre/recipes/fe_india.recipe
Kovid Goyal 9030c8cbd2
...
2022-05-02 12:00:41 +05:30

76 lines
3.4 KiB
Plaintext

__license__ = 'GPL v3'
__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
'''
financialexpress.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class FE_India(BasicNewsRecipe):
title = 'The Financial Express'
__author__ = 'Darko Miletic'
description = 'Financial news from India'
publisher = 'The Indian Express Limited'
category = 'news, politics, finances, India'
oldest_article = 2
max_articles_per_feed = 200
no_stylesheets = True
encoding = 'utf-8'
use_embedded_content = False
language = 'en_IN'
remove_empty_feeds = True
ignore_duplicate_articles = {'url'}
publication_type = 'magazine'
conversion_options = {
'comment': description, 'tags': category, 'publisher': publisher, 'language': language
}
keep_only_tags = [classes('wp-block-post-title wp-block-post-excerpt ie-network-post-meta-wrapper wp-block-post-featured-image wp-block-post-content')]
remove_tags = [classes('parent_also_read')]
remove_attributes = ['width', 'height']
feeds = [
# https://www.financialexpress.com/syndication/
# Print feeds
('Front Page','https://www.financialexpress.com/print/front-page/feed/'),
('Corporate Markets','https://www.financialexpress.com/print/corporate-markets/feed/'),
('Economy','https://www.financialexpress.com/print/economy-print/feed/'),
('Opinion','https://www.financialexpress.com/print/edits-columns/feed/'),
('personal Finance','https://www.financialexpress.com/print/personal-finance-print/feed/'),
# ('Brandwagon', 'https://www.financialexpress.com/print/brandwagon/feed/'),
# Other Feeds
('Economy', 'https://www.financialexpress.com/economy/feed/'),
('Banking & finance', 'https://www.financialexpress.com/industry/banking-finance/feed/'),
('Opinion', 'https://www.financialexpress.com/opinion/feed/'),
('Editorial', 'https://www.financialexpress.com/editorial/feed/'),
('Budget', 'https://www.financialexpress.com/budget/feed/'),
('Industry', 'https://www.financialexpress.com/industry/feed/'),
('Market', 'https://www.financialexpress.com/market/feed/'),
('Jobs', 'https://www.financialexpress.com/jobs/feed/'),
('SME', 'https://www.financialexpress.com/industry/sme/feed/'),
('Mutual Funds', 'https://www.financialexpress.com/money/mutual-funds/feed/'),
('Health','https://www.financialexpress.com/lifestyle/health/feed'),
# ('Health Care','https://www.financialexpress.com/healthcare/feed'),
('Science','https://www.financialexpress.com/lifestyle/science/feed'),
('Infrastructure','https://www.financialexpress.com/infrastructure/feed'),
('Money','https://www.financialexpress.com/money/feed'),
]
def get_cover_url(self):
soup = self.index_to_soup('https://www.magzter.com/IN/The-Indian-Express-Ltd./Financial-Express-Mumbai/Business/')
for citem in soup.findAll('meta', content=lambda s: s and s.endswith('view/3.jpg')):
return citem['content']
def preprocess_html(self, soup, *a):
for img in soup.findAll(attrs={'data-src': True}):
img['src'] = img['data-src']
return soup