mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
67 lines
2.2 KiB
Plaintext
67 lines
2.2 KiB
Plaintext
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
def classes(classes):
|
|
q = frozenset(classes.split(' '))
|
|
return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
|
|
|
|
class Forbes(BasicNewsRecipe):
|
|
title = u'Forbes'
|
|
description = 'Business and Financial News'
|
|
__author__ = 'Kovid Goyal'
|
|
oldest_article = 30
|
|
max_articles_per_feed = 20
|
|
language = 'en'
|
|
encoding = 'utf-8'
|
|
no_stylesheets = True
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
remove_empty_feeds = True
|
|
|
|
extra_css = '''
|
|
div.fb-captioned-img {
|
|
font-size: smaller;
|
|
margin-top: 1em; margin-bottom: 1em;
|
|
}
|
|
div.fb-captioned-img img {
|
|
display:block;
|
|
margin-left: auto; margin-right: auto;
|
|
}
|
|
'''
|
|
feeds = [
|
|
(u'Latest', u'https://www.forbes.com/news/index.xml'),
|
|
(u'Most Popular', u'https://www.forbes.com/feeds/popstories.xml'),
|
|
(u'Technology', u'https://www.forbes.com/technology/index.xml'),
|
|
(u'Business', u'https://www.forbes.com/business/index.xml'),
|
|
(u'Sports Money', u'https://www.forbes.com/sportsmoney/index.xml'),
|
|
(u'Leadership', u'https://www.forbes.com/leadership/index.xml'),
|
|
]
|
|
|
|
keep_only_tags = [
|
|
classes('article-headline-container hero-image-block article-body bottom-contrib-block')
|
|
]
|
|
|
|
remove_tags = [
|
|
classes('article-sharing'),
|
|
dict(name='button'),
|
|
]
|
|
|
|
def preprocess_html(self, soup):
|
|
h = soup.find(**classes('hero-image-block'))
|
|
if h is not None:
|
|
h1 = soup.find(**classes('article-headline-container'))
|
|
h.extract()
|
|
h1.append(h)
|
|
return soup
|
|
|
|
def get_browser(self):
|
|
br = BasicNewsRecipe.get_browser(self)
|
|
br.set_cookie('dailyWelcomeCookie', 'true', '.forbes.com')
|
|
br.set_cookie('welcomeAd', 'true', '.forbes.com')
|
|
return br
|
|
|
|
# def parse_index(self):
|
|
# return [('Articles', [{'title':'Test', 'url':
|
|
# 'http://www.forbes.com/sites/hamdiraini/2016/04/25/bazin-seeks-startups-to-accelerate-accorhotels-transformation/'}])]
|