From de7f80653cd1a26bf0ec8064614c7c8b0b0ec815 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sun, 5 Jan 2025 14:46:25 +0530 Subject: [PATCH] ... --- recipes/substack.recipe | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/recipes/substack.recipe b/recipes/substack.recipe index bca1ef3d84..c27de6e180 100644 --- a/recipes/substack.recipe +++ b/recipes/substack.recipe @@ -20,7 +20,7 @@ import re from mechanize import Request -from calibre.web.feeds.news import BasicNewsRecipe +from calibre.web.feeds.news import BasicNewsRecipe, classes class Substack(BasicNewsRecipe): @@ -30,13 +30,23 @@ class Substack(BasicNewsRecipe): oldest_article = 7 language = 'en' max_articles_per_feed = 100 - auto_cleanup = True - auto_cleanup_keep = '//*[@class="subtitle"]' needs_subscription = 'optional' use_embedded_content = False masthead_url = 'https://substack.com/img/substack_wordmark.png' cover_url = 'https://substack.com/img/substack.png' - extra_css = '.captioned-image-container, .image-container {font-size: small;}' + extra_css = '.captioned-image-container, .image-container, .image-caption {font-size: small;}' + remove_empty_feeds = True + remove_attributes = ['style', 'height', 'width'] + no_stylesheets = True + + keep_only_tags = [ + classes('post-title post-subtitle subtitle available-content') + ] + + remove_tags = [ + dict(name=['svg', 'source']), + classes('subscribe-widget button-wrapper') + ] recipe_specific_options = { 'auths': { @@ -113,8 +123,4 @@ class Substack(BasicNewsRecipe): res = w for img in soup.findAll('img', attrs={'src': True}): img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src']) - for src in soup.findAll(['source', 'svg']): - src.extract() - for but in soup.findAll(attrs={'class': ['button-wrapper']}): - but.extract() return soup