Update Naked Capitalism

This commit is contained in:
Kovid Goyal 2026-03-21 17:25:54 +05:30
parent f23369204d
commit 2c9af589ed
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,43 +1,47 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from calibre.web.feeds.news import BasicNewsRecipe
class NakedCapitalism(BasicNewsRecipe):
title = 'Naked Capitalism'
__author__ = 'PaulB223'
language = 'en_US'
oldest_article = 7
max_articles_per_feed = 50
max_articles_per_feed = 100
auto_cleanup = True
simultaneous_downloads = 1
delay = 5.0
disable_header = True
fetch_masthead = False
no_stylesheets = True
remove_javascript = True
keep_only_tags = [
dict(name='article'),
dict(attrs={'class': lambda x: x and 'post-content' in x.split()}),
dict(attrs={'class': 'entry-content'}),
dict(attrs={'class': 'post-content'}),
dict(id='content')
]
remove_tags = [
dict(name=['nav', 'header', 'footer', 'aside', 'svg', 'button', 'script', 'style']),
dict(attrs={'class': lambda x: x and any(c in x.lower() for c in ['sidebar', 'ads', 'ad-', 'share', 'donation', 'related', 'comments'])})
]
def get_feeds(self):
return [
('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'),
('Naked Capitalism (p2)', 'https://www.nakedcapitalism.com/feed?paged=2'),
]
extra_css = '''
body { font-family: serif !important; color: black !important; }
p { display: block !important; margin-bottom: 1em !important; line-height: 1.4 !important; }
div, article, section {
width: auto !important;
height: auto !important;
overflow: visible !important;
display: block !important;
}
'''
def preprocess_html(self, soup):
for link in soup.findAll('a', text=lambda x: x and 'Read more' in x):
link.decompose()
for link in soup.findAll('a', text=lambda x: x and 'Continue reading' in x):
link.decompose()
for tag in soup.findAll(['script', 'style', 'iframe']):
tag.decompose()
for tag in soup.findAll(True):
if tag.has_attr('style'):
del tag['style']
if tag.has_attr('srcset'):
del tag['srcset']
return soup
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
br.set_handle_robots(False)
br.addheaders = [
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36 Edg/145.0.0.0'),
('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
('Accept-Language', 'en-US,en;q=0.5'),
('Accept-Encoding', 'gzip, deflate, br'),
('Connection', 'keep-alive'),
]
return br
feeds = [
('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'),
]