From 6b69b78eadce57463eab19d8b8aa2079abcd75a2 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 29 Sep 2018 10:32:33 +0530
Subject: [PATCH] Update The New Zealand Herald

---
 recipes/nzherald.recipe | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/recipes/nzherald.recipe b/recipes/nzherald.recipe
index f416536cbe..631089309e 100644
--- a/recipes/nzherald.recipe
+++ b/recipes/nzherald.recipe
@@ -1,5 +1,10 @@
 from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
+
+
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(attrs={
+        'class': lambda x: x and frozenset(x.split()).intersection(q)})
 
 
 class NewZealandHerald(BasicNewsRecipe):
@@ -11,6 +16,15 @@ class NewZealandHerald(BasicNewsRecipe):
     language = 'en_NZ'
     oldest_article = 2.5
 
+    keep_only_tags = [
+        classes('article-header'),
+        dict(id='article-content'),
+    ]
+
+    remove_tags = [
+        classes('ad-container pb-f-video-video-player pb-f-article-related-articles social-shares')
+    ]
+
     feeds = [
         ('Business',
          'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000003.xml'),
@@ -36,8 +50,7 @@ class NewZealandHerald(BasicNewsRecipe):
          'http://rss.nzherald.co.nz/rss/xml/nzhrsscid_000000008.xml'),
     ]
 
-    def print_version(self, url):
-        m = re.search(r'objectid=(\d+)', url)
-        if m is None:
-            return url
-        return 'http://www.nzherald.co.nz/news/print.cfm?pnum=1&objectid=' + m.group(1)
+    def preprocess_html(self, soup, *a):
+        for img in soup.findAll('img', attrs={'data-srcset': True}):
+            img['src'] = img['data-srcset'].split()[0]
+        return soup