calibre/recipes/forbes.recipe
2020-03-15 18:57:40 +05:30

67 lines
2.2 KiB
Plaintext

from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
class Forbes(BasicNewsRecipe):
title = u'Forbes'
description = 'Business and Financial News'
__author__ = 'Kovid Goyal'
oldest_article = 30
max_articles_per_feed = 20
language = 'en'
encoding = 'utf-8'
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
remove_empty_feeds = True
extra_css = '''
div.fb-captioned-img {
font-size: smaller;
margin-top: 1em; margin-bottom: 1em;
}
div.fb-captioned-img img {
display:block;
margin-left: auto; margin-right: auto;
}
'''
feeds = [
(u'Latest', u'https://www.forbes.com/news/index.xml'),
(u'Most Popular', u'https://www.forbes.com/feeds/popstories.xml'),
(u'Technology', u'https://www.forbes.com/technology/index.xml'),
(u'Business', u'https://www.forbes.com/business/index.xml'),
(u'Sports Money', u'https://www.forbes.com/sportsmoney/index.xml'),
(u'Leadership', u'https://www.forbes.com/leadership/index.xml'),
]
keep_only_tags = [
classes('article-headline-container hero-image-block article-body bottom-contrib-block')
]
remove_tags = [
classes('article-sharing'),
dict(name='button'),
]
def preprocess_html(self, soup):
h = soup.find(**classes('hero-image-block'))
if h is not None:
h1 = soup.find(**classes('article-headline-container'))
h.extract()
h1.append(h)
return soup
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.set_cookie('dailyWelcomeCookie', 'true', '.forbes.com')
br.set_cookie('welcomeAd', 'true', '.forbes.com')
return br
# def parse_index(self):
# return [('Articles', [{'title':'Test', 'url':
# 'http://www.forbes.com/sites/hamdiraini/2016/04/25/bazin-seeks-startups-to-accelerate-accorhotels-transformation/'}])]