From cc32c4ac73863c8d3d379594963fdc729e17d2f0 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:22:36 +0200 Subject: [PATCH] cleanup content manually instead of auto 1-) compress images 2-) keep some tags and remove others Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 064b7f69e6..63c6c88b77 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -8,8 +8,24 @@ class SalzburgerNachrichten(BasicNewsRecipe): max_articles_per_feed = 10 remove_javascript = True no_stylesheets = True - auto_cleanup = True remove_empty_feeds = True + compress_news_images = True + + keep_only_tags = [ + dict(name='h1', attrs={'class': 'article-title'}), + dict(name='div', attrs={'class': 'article-author'}), + dict(name='div', attrs={'class': 'article-publication-date'}), + dict(name='div', attrs={'class': 'media-normal'}), + dict(name='div', attrs={'class': 'article-body-text'}), + dict(name='div', attrs={'class': 'article-sections'}), + ] + + remove_tags = [ + dict(name='source'), + dict(name='figcaption'), + dict(name='img', attrs={'class': 'article-author__logo'}), + dict(name='p', attrs={'class': 'article-author__desktopprefix'}), + ] feeds = [ ('Salzburg', 'https://www.sn.at/salzburg/xml/rss'),