From 2c9af589ed9aaa96a91bc9cc4e8e4b5738e979e2 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 21 Mar 2026 17:25:54 +0530 Subject: [PATCH] Update Naked Capitalism --- recipes/nakedcapitalism.recipe | 70 ++++++++++++++++++---------------- 1 file changed, 37 insertions(+), 33 deletions(-) diff --git a/recipes/nakedcapitalism.recipe b/recipes/nakedcapitalism.recipe index b6bcf7da4d..fd1d1f99fe 100644 --- a/recipes/nakedcapitalism.recipe +++ b/recipes/nakedcapitalism.recipe @@ -1,43 +1,47 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 from calibre.web.feeds.news import BasicNewsRecipe class NakedCapitalism(BasicNewsRecipe): title = 'Naked Capitalism' - __author__ = 'PaulB223' - language = 'en_US' oldest_article = 7 - max_articles_per_feed = 50 + max_articles_per_feed = 100 + auto_cleanup = True - simultaneous_downloads = 1 - delay = 5.0 - - disable_header = True - fetch_masthead = False - no_stylesheets = True - remove_javascript = True - - keep_only_tags = [ - dict(name='article'), - dict(attrs={'class': lambda x: x and 'post-content' in x.split()}), - dict(attrs={'class': 'entry-content'}), - dict(attrs={'class': 'post-content'}), - dict(id='content') - ] - - remove_tags = [ - dict(name=['nav', 'header', 'footer', 'aside', 'svg', 'button', 'script', 'style']), - dict(attrs={'class': lambda x: x and any(c in x.lower() for c in ['sidebar', 'ads', 'ad-', 'share', 'donation', 'related', 'comments'])}) - ] - - def get_feeds(self): - return [ - ('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'), - ('Naked Capitalism (p2)', 'https://www.nakedcapitalism.com/feed?paged=2'), - ] + extra_css = ''' + body { font-family: serif !important; color: black !important; } + p { display: block !important; margin-bottom: 1em !important; line-height: 1.4 !important; } + div, article, section { + width: auto !important; + height: auto !important; + overflow: visible !important; + display: block !important; + } + ''' def preprocess_html(self, soup): - for link in soup.findAll('a', text=lambda x: x and 'Read more' in x): - link.decompose() - for link in soup.findAll('a', text=lambda x: x and 'Continue reading' in x): - link.decompose() + for tag in soup.findAll(['script', 'style', 'iframe']): + tag.decompose() + for tag in soup.findAll(True): + if tag.has_attr('style'): + del tag['style'] + if tag.has_attr('srcset'): + del tag['srcset'] return soup + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.set_handle_robots(False) + br.addheaders = [ + ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36 Edg/145.0.0.0'), + ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('Accept-Encoding', 'gzip, deflate, br'), + ('Connection', 'keep-alive'), + ] + return br + + feeds = [ + ('Naked Capitalism', 'https://www.nakedcapitalism.com/feed'), + ]