From 8273a68f066974313667224cbee7d9e7662e5eab Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sun, 11 May 2025 09:58:09 +0530
Subject: [PATCH] Update nzherald.recipe

---
 recipes/nzherald.recipe | 80 ++++++++++++++++++++++++-----------------
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/recipes/nzherald.recipe b/recipes/nzherald.recipe
index 631089309e..5ef9f53a4f 100644
--- a/recipes/nzherald.recipe
+++ b/recipes/nzherald.recipe
@@ -1,56 +1,70 @@
+#!/usr/bin/env  python
 from calibre.web.feeds.recipes import BasicNewsRecipe
 
 
 def classes(classes):
     q = frozenset(classes.split(' '))
-    return dict(attrs={
-        'class': lambda x: x and frozenset(x.split()).intersection(q)})
+    return dict(attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
 
 
 class NewZealandHerald(BasicNewsRecipe):
-
     title = 'New Zealand Herald'
-    __author__ = 'Kovid Goyal'
+    __author__ = 'unkn0wn'
     description = 'Daily news'
     timefmt = ' [%d %b, %Y]'
     language = 'en_NZ'
-    oldest_article = 2.5
+    oldest_article = 1
+    remove_attributes = ['style', 'height', 'width']
+    use_embedded_content = False
+    encoding = 'utf-8'
+    ignore_duplicate_articles = {'url'}
+    no_stylesheets = True
+    resolve_internal_links = True
+    remove_empty_feeds = True
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('https://www.frontpages.com/the-new-zealand-herald/')
+        return (
+            'https://www.frontpages.com'
+            + soup.find('img', attrs={'id': 'giornale-img'})['src']
+        )
+
+    extra_css = '.article-media__caption {font-size: small;}'
 
     keep_only_tags = [
-        classes('article-header'),
-        dict(id='article-content'),
+        dict(
+            attrs={
+                'data-test-ui': [
+                    'article__heading',
+                    'author--text--body',
+                    'article-top-body',
+                    'article-bottom-body',
+                ]
+            }
+        ),
     ]
 
-    remove_tags = [
-        classes('ad-container pb-f-video-video-player pb-f-article-related-articles social-shares')
-    ]
+    remove_tags = [classes('article__ad-wrapper article__action-bar')]
 
     feeds = [
-        ('Business',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
-        ('World',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'),
-        ('National',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'),
-        ('Entertainment',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'),
-        ('Travel',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'),
-        ('Opinion',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
-        ('Life & Style',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
-        ('Technology',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
-        ('Sport',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
-        ('Motoring',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'),
-        ('Property',
-         'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
+        ('Business', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
+        ('World', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000002.xml'),
+        ('National', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000001.xml'),
+        ('Entertainment', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_001501119.xml'),
+        ('Travel', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000007.xml'),
+        ('Opinion', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000466.xml'),
+        ('Life & Style', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000006.xml'),
+        ('Technology', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000005.xml'),
+        ('Sport', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000004.xml'),
+        ('Motoring', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000009.xml'),
+        ('Property', 'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
     ]
 
     def preprocess_html(self, soup, *a):
         for img in soup.findAll('img', attrs={'data-srcset': True}):
-            img['src'] = img['data-srcset'].split()[0]
+            for x in img['data-srcset'].split(','):
+                if '768w' in x:
+                    img['src'] = x.split()[0]
+            else:
+                img['src'] = img['data-srcset'].split(',')[-1].split()[0]
         return soup