mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-04-01 23:02:27 -04:00
Update Naked Capitalism
This commit is contained in:
parent
f23369204d
commit
2c9af589ed
@ -1,43 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
class NakedCapitalism(BasicNewsRecipe):
|
||||
title = 'Naked Capitalism'
|
||||
__author__ = 'PaulB223'
|
||||
language = 'en_US'
|
||||
oldest_article = 7
|
||||
max_articles_per_feed = 50
|
||||
max_articles_per_feed = 100
|
||||
auto_cleanup = True
|
||||
|
||||
simultaneous_downloads = 1
|
||||
delay = 5.0
|
||||
|
||||
disable_header = True
|
||||
fetch_masthead = False
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='article'),
|
||||
dict(attrs={'class': lambda x: x and 'post-content' in x.split()}),
|
||||
dict(attrs={'class': 'entry-content'}),
|
||||
dict(attrs={'class': 'post-content'}),
|
||||
dict(id='content')
|
||||
]
|
||||
|
||||
remove_tags = [
|
||||
dict(name=['nav', 'header', 'footer', 'aside', 'svg', 'button', 'script', 'style']),
|
||||
dict(attrs={'class': lambda x: x and any(c in x.lower() for c in ['sidebar', 'ads', 'ad-', 'share', 'donation', 'related', 'comments'])})
|
||||
]
|
||||
|
||||
def get_feeds(self):
|
||||
return [
|
||||
('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'),
|
||||
('Naked Capitalism (p2)', 'https://www.nakedcapitalism.com/feed?paged=2'),
|
||||
]
|
||||
extra_css = '''
|
||||
body { font-family: serif !important; color: black !important; }
|
||||
p { display: block !important; margin-bottom: 1em !important; line-height: 1.4 !important; }
|
||||
div, article, section {
|
||||
width: auto !important;
|
||||
height: auto !important;
|
||||
overflow: visible !important;
|
||||
display: block !important;
|
||||
}
|
||||
'''
|
||||
|
||||
def preprocess_html(self, soup):
|
||||
for link in soup.findAll('a', text=lambda x: x and 'Read more' in x):
|
||||
link.decompose()
|
||||
for link in soup.findAll('a', text=lambda x: x and 'Continue reading' in x):
|
||||
link.decompose()
|
||||
for tag in soup.findAll(['script', 'style', 'iframe']):
|
||||
tag.decompose()
|
||||
for tag in soup.findAll(True):
|
||||
if tag.has_attr('style'):
|
||||
del tag['style']
|
||||
if tag.has_attr('srcset'):
|
||||
del tag['srcset']
|
||||
return soup
|
||||
|
||||
def get_browser(self):
|
||||
br = BasicNewsRecipe.get_browser(self)
|
||||
br.set_handle_robots(False)
|
||||
br.addheaders = [
|
||||
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36 Edg/145.0.0.0'),
|
||||
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
|
||||
('Accept-Language', 'en-US,en;q=0.5'),
|
||||
('Accept-Encoding', 'gzip, deflate, br'),
|
||||
('Connection', 'keep-alive'),
|
||||
]
|
||||
return br
|
||||
|
||||
feeds = [
|
||||
('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'),
|
||||
]
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user