From 0841f08d2214625f8ce9f001ca396e652d483337 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 14 Oct 2023 11:05:10 +0530
Subject: [PATCH] Newslaundry recipe

---
 recipes/icons/newslaundry.png | Bin 0 -> 286 bytes
 recipes/newslaundry.recipe    |  36 ++++++++++++++++++++++++++++++++++
 recipes/reuters.recipe        |  29 +++++++++++++++------------
 3 files changed, 53 insertions(+), 12 deletions(-)
 create mode 100644 recipes/icons/newslaundry.png
 create mode 100644 recipes/newslaundry.recipe

diff --git a/recipes/icons/newslaundry.png b/recipes/icons/newslaundry.png
new file mode 100644
index 0000000000000000000000000000000000000000..342e059d0d300fea9fd81ebd87d0a2ac48038230
GIT binary patch
literal 286
zcmV+(0pb3MP)<h;3K|Lk000e1NJLTq000mG000mO0{{R3C@l|D0001EP)t-s>n1G!
z|NrU$0O}YZ>JJ&~A}Hz<9rm@h>HrV;#Kr#k`Rg4d^O&0JC@%AnlkaJ4?`3H8qNDPM
ziS(eN_q@IO*VyU+3H|Nu?p$B_(9!aWjQZBs{Nv;M+}!!f%k4@|@qmNN;xy9$004GL
zL_t&-l~vA55`!QVL{XZrDCCJz$w!d=uhNtgUV7AZMpuY?<~PBn9yABVz@}wMmVtKZ
z=`;twu61BAY}FSId!|?38OZTnM)?PyFBllJ9i_4|bHY2ET_%E*psANPgof7JZ9qN{
kdb@U@8N59TrDDAA1(DMP_q!vt4FCWD07*qoM6N<$f}D?q`v3p{

literal 0
HcmV?d00001

diff --git a/recipes/newslaundry.recipe b/recipes/newslaundry.recipe
new file mode 100644
index 0000000000..7b105a59ce
--- /dev/null
+++ b/recipes/newslaundry.recipe
@@ -0,0 +1,36 @@
+from calibre.web.feeds.news import BasicNewsRecipe, classes
+
+class newslaundry(BasicNewsRecipe):
+    title = 'Newslaundry'
+    __author__ = 'unkn0wn'
+    description = (
+        'Newslaundry is a reader-supported, independent news media company. In an industry driven by corporate'
+        ' and government interests, we strongly believe in the need for an independent news model, and a free'
+        ' and accountable press.'
+    )
+    language = 'en_IN'
+    masthead_url = 'https://images.assettype.com/newslaundry/2020-01/d91cad07-9650-47e9-8bdc-9a6247354d95/Header_logo_NL__2_New.png'
+    encoding = 'utf-8'
+    no_stylesheets = True
+    remove_javascript = True
+    oldest_article = 7 # days
+    resolve_internal_links = True
+
+    ignore_duplicate_articles = {'url'}
+
+    # keep_only_tags = [classes('headline subheadline authorWithTimeStamp story-card')]
+
+    feeds = [
+        ('Articles', 'https://www.newslaundry.com/stories.rss?time-period=last-7-days')
+    ]
+
+    # def preprocess_html(self, soup):
+    #     if h1 := soup.find(**classes('headline')):
+    #        h1.name = 'h1'
+    #     if h3 := soup.find(**classes('subheadline')):
+    #        h3.name = 'h3'
+    #    return soup
+
+    def print_version(self, url):
+        if 'hindi.newslaundry' in url: self.abort_article('Skipping hindi article') # remove this line if you want hindi articles.
+        return url
diff --git a/recipes/reuters.recipe b/recipes/reuters.recipe
index f9d0e40f3a..92ddb4c0dd 100644
--- a/recipes/reuters.recipe
+++ b/recipes/reuters.recipe
@@ -58,27 +58,29 @@ class Reuters(BasicNewsRecipe):
     __author__ = 'Kovid Goyal'
     language = 'en'
 
+
     keep_only_tags = [
         prefixed_classes('article-body__container__ article-header__container__'),
     ]
     remove_tags = [
         prefixed_classes(
-            'context-widget__tabs___'
+            'context-widget__tabs___ article-header__toolbar__ read-next-mobile__container__ toolbar__container__ button__link__'
             ' ArticleBody-read-time-and-social Slideshow-expand-button- TwoColumnsLayout-footer- RegistrationPrompt__container___'
-            ' SocialEmbed__inner___'
+            ' SocialEmbed__inner___ trust-badge author-bio__social__ with-spinner__spinner__ author-bio__author-image__'
         ),
-        dict(name=['button', 'link']),
+        dict(name=['button', 'link', 'svg']),
     ]
-    remove_attributes = ['style']
-    extra_css = '''
-    img { max-width: 100%; }
-    '''
+    remove_attributes = ['style', 'height', 'width']
 
-    def preprocess_html(self, soup, *a):
-        for noscript in soup.findAll('noscript'):
-            if noscript.findAll('img'):
-                noscript.name = 'div'
-        return soup
+    extra_css = '''
+        img { max-width: 100%; }
+        [class^="article-header__tags__"],
+        [class^="author-bio__author-card__"],
+        [class^="article-header__author-date__"] {
+            font-size:small;
+        }
+        [data-testid="primary-gallery"], [data-testid="primary-image"] { font-size:small; text-align:center; }
+    '''
 
     def parse_index(self):
         base, sections = country_defs[country]
@@ -103,6 +105,9 @@ class Reuters(BasicNewsRecipe):
             self.log('\t', article['title'], article['url'])
 
     def preprocess_html(self, soup):
+        for noscript in soup.findAll('noscript'):
+            if noscript.findAll('img'):
+                noscript.name = 'div'
         for img in soup.findAll('img', attrs={'srcset':True}):
             img['src'] = img['srcset'].split()[0]
         return soup