From 6245d7dcf9999e2d31e1576f15b6140bcf7c4968 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 11:08:56 +0200 Subject: [PATCH 01/10] add initial commit for salzburger nachrichten Changes to be committed: new file: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 recipes/salzburger_nachrichten.recipe diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe new file mode 100644 index 0000000000..6b3f5cde20 --- /dev/null +++ b/recipes/salzburger_nachrichten.recipe @@ -0,0 +1,34 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from calibre.web.feeds.news import BasicNewsRecipe + +class SalzburgerNachrichten(BasicNewsRecipe): + title = 'Salzburger Nachrichten' + oldest_article = 1 + max_articles_per_feed = 10 + remove_javascript = True + no_stylesheets = True + auto_cleanup = True + remove_empty_feeds = True + + feeds = [ + ('Salzburg', 'https://www.sn.at/salzburg/xml/rss'), + ('Innenpolitik', 'https://www.sn.at/politik/innenpolitik/xml/rss'), + ('Weltpolitik', 'https://www.sn.at/politik/weltpolitik/xml/rss'), + ('Wirtschaft', 'https://www.sn.at/wirtschaft/xml/rss'), + ('Kultur', 'https://www.sn.at/kultur/xml/rss'), + ('Leben', 'https://www.sn.at/leben/xml/rss'), + ('Wetter', 'https://www.sn.at/wetter/xml/rss'), + ('Festspiele', 'https://www.sn.at/festspiele/xml/rss'), + ('Sport', 'https://www.sn.at/sport/xml/rss'), + ('Panorama', 'https://www.sn.at/panorama/xml/rss'), + ('Wissen', 'https://www.sn.at/wissen/xml/rss'), + ('Wochenende', 'https://www.sn.at/panorama/wissen/xml/rss'), + ('Kinderseite', 'https://www.sn.at/panorama/kinder/xml/rss'), + ('Jengeseite', 'https://www.sn.at/jungeseite/xml/rss'), + ('SNin', 'https://www.sn.at/snin/xml/rss'), + ('Chronik Oesterreich', 'https://www.sn.at/panorama/oesterreich/xml/rss'), + ('Chronik International', 'https://www.sn.at/panorama/international/xml/rss'), + ('Medien', 'https://www.sn.at/panorama/medien/xml/rss'), + ('Briefe an die SN', 'https://www.sn.at/leserforum/xml/rss'), + ] From bdee5d1f7cba8d87958ad5b8a9e526be88e92c97 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 11:16:22 +0200 Subject: [PATCH 02/10] =?UTF-8?q?add=20=C3=96=20istead=20of=20Oe=20in=20th?= =?UTF-8?q?e=20feed=20name?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes to be committed modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 6b3f5cde20..a176f26157 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -27,7 +27,7 @@ class SalzburgerNachrichten(BasicNewsRecipe): ('Kinderseite', 'https://www.sn.at/panorama/kinder/xml/rss'), ('Jengeseite', 'https://www.sn.at/jungeseite/xml/rss'), ('SNin', 'https://www.sn.at/snin/xml/rss'), - ('Chronik Oesterreich', 'https://www.sn.at/panorama/oesterreich/xml/rss'), + ('Chronik Österreich', 'https://www.sn.at/panorama/oesterreich/xml/rss'), ('Chronik International', 'https://www.sn.at/panorama/international/xml/rss'), ('Medien', 'https://www.sn.at/panorama/medien/xml/rss'), ('Briefe an die SN', 'https://www.sn.at/leserforum/xml/rss'), From 21bebd621d253e06437e6969717408756159a99f Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 11:21:40 +0200 Subject: [PATCH 03/10] add two feeds and delete empty space Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index a176f26157..064b7f69e6 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -8,7 +8,7 @@ class SalzburgerNachrichten(BasicNewsRecipe): max_articles_per_feed = 10 remove_javascript = True no_stylesheets = True - auto_cleanup = True + auto_cleanup = True remove_empty_feeds = True feeds = [ @@ -31,4 +31,6 @@ class SalzburgerNachrichten(BasicNewsRecipe): ('Chronik International', 'https://www.sn.at/panorama/international/xml/rss'), ('Medien', 'https://www.sn.at/panorama/medien/xml/rss'), ('Briefe an die SN', 'https://www.sn.at/leserforum/xml/rss'), + ('Veranstaltungen', 'https://www.sn.at/veranstaltungen/xml/rss'), + ('Klimawandel', 'https://www.sn.at/panorama/klimawandel/xml/rss') ] From cc32c4ac73863c8d3d379594963fdc729e17d2f0 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:22:36 +0200 Subject: [PATCH 04/10] cleanup content manually instead of auto 1-) compress images 2-) keep some tags and remove others Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 064b7f69e6..63c6c88b77 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -8,8 +8,24 @@ class SalzburgerNachrichten(BasicNewsRecipe): max_articles_per_feed = 10 remove_javascript = True no_stylesheets = True - auto_cleanup = True remove_empty_feeds = True + compress_news_images = True + + keep_only_tags = [ + dict(name='h1', attrs={'class': 'article-title'}), + dict(name='div', attrs={'class': 'article-author'}), + dict(name='div', attrs={'class': 'article-publication-date'}), + dict(name='div', attrs={'class': 'media-normal'}), + dict(name='div', attrs={'class': 'article-body-text'}), + dict(name='div', attrs={'class': 'article-sections'}), + ] + + remove_tags = [ + dict(name='source'), + dict(name='figcaption'), + dict(name='img', attrs={'class': 'article-author__logo'}), + dict(name='p', attrs={'class': 'article-author__desktopprefix'}), + ] feeds = [ ('Salzburg', 'https://www.sn.at/salzburg/xml/rss'), From 710b09b341951c66b52562222adae730c6a72b8e Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:25:23 +0200 Subject: [PATCH 05/10] add language code Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 63c6c88b77..72fa177aea 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -6,6 +6,7 @@ class SalzburgerNachrichten(BasicNewsRecipe): title = 'Salzburger Nachrichten' oldest_article = 1 max_articles_per_feed = 10 + language = 'de_AT' remove_javascript = True no_stylesheets = True remove_empty_feeds = True From e2c61dbeb0eab2ffcc971416602b835df6055285 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:26:42 +0200 Subject: [PATCH 06/10] ignore duplicate articles Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 72fa177aea..90d5cccda4 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -11,6 +11,7 @@ class SalzburgerNachrichten(BasicNewsRecipe): no_stylesheets = True remove_empty_feeds = True compress_news_images = True + ignore_duplicate_articles = {'url'} keep_only_tags = [ dict(name='h1', attrs={'class': 'article-title'}), From 494109bb67e2d8cdf0177d086e04c594edd70823 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:35:10 +0200 Subject: [PATCH 07/10] add author name Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 90d5cccda4..079697bf83 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -4,6 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class SalzburgerNachrichten(BasicNewsRecipe): title = 'Salzburger Nachrichten' + __author__ = 'ims' oldest_article = 1 max_articles_per_feed = 10 language = 'de_AT' From 1dd220e709ac18a7c4df640d9e6f29d174ae9344 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 12:47:05 +0200 Subject: [PATCH 08/10] add cover url Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 079697bf83..1b2ceb72f7 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -13,6 +13,7 @@ class SalzburgerNachrichten(BasicNewsRecipe): remove_empty_feeds = True compress_news_images = True ignore_duplicate_articles = {'url'} + cover_url = 'https://front.e-pages.dk/data/teasers/salzburgernachrichten/308/vector/ts1.jpg' keep_only_tags = [ dict(name='h1', attrs={'class': 'article-title'}), From 4e9f97d72cd3cf536f257d27ff61ca5786173091 Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 13:46:57 +0200 Subject: [PATCH 09/10] add extra_css it is added to make the color of the h1 and h2 tags convenient to sn's color Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 3 +++ 1 file changed, 3 insertions(+) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index 1b2ceb72f7..f5302d41e1 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -15,6 +15,9 @@ class SalzburgerNachrichten(BasicNewsRecipe): ignore_duplicate_articles = {'url'} cover_url = 'https://front.e-pages.dk/data/teasers/salzburgernachrichten/308/vector/ts1.jpg' + extra_css = '''.article-title {font-size: 1.5em; color: #0358a9;} + h2 {font-size: 1.2em; color: #0358a9;}''' + keep_only_tags = [ dict(name='h1', attrs={'class': 'article-title'}), dict(name='div', attrs={'class': 'article-author'}), From b2505dc36e6bda2335a855ff6e8604f3bc84dbbe Mon Sep 17 00:00:00 2001 From: "ilker m. sitki" Date: Wed, 4 Sep 2024 13:50:33 +0200 Subject: [PATCH 10/10] add the full name of the author Changes to be committed: modified: salzburger_nachrichten.recipe --- recipes/salzburger_nachrichten.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/salzburger_nachrichten.recipe b/recipes/salzburger_nachrichten.recipe index f5302d41e1..73be932156 100644 --- a/recipes/salzburger_nachrichten.recipe +++ b/recipes/salzburger_nachrichten.recipe @@ -4,7 +4,7 @@ from calibre.web.feeds.news import BasicNewsRecipe class SalzburgerNachrichten(BasicNewsRecipe): title = 'Salzburger Nachrichten' - __author__ = 'ims' + __author__ = 'İlker Melik Sıtkı' oldest_article = 1 max_articles_per_feed = 10 language = 'de_AT'