From 6fdacb38fcbec931b1e527f0e4a09fdc513ed2f2 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sun, 15 Oct 2023 12:45:48 +0530
Subject: [PATCH] Update spectator_magazine.recipe

---
 recipes/spectator_magazine.recipe | 44 +++++++++++++------------------
 1 file changed, 19 insertions(+), 25 deletions(-)

diff --git a/recipes/spectator_magazine.recipe b/recipes/spectator_magazine.recipe
index aee63e9b42..2debf2eab3 100644
--- a/recipes/spectator_magazine.recipe
+++ b/recipes/spectator_magazine.recipe
@@ -1,6 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe, classes
-from calibre import browser
-
 
 def absurl(url):
     if url.startswith('/'):
@@ -22,20 +20,24 @@ class spectator(BasicNewsRecipe):
     resolve_internal_links = True
 
     extra_css = '''
-        .author-bio {font-size:small;}
+        .writers-link__text, .author-bio__content {font-size:small; color:#404040;}
         #fig-c {text-align:center; font-size:small;}
-        blockquote, em {color:#404040;}
+        blockquote, em, i {color:#202020;}
+        img {display:block; margin:0 auto;}
     '''
 
     keep_only_tags = [
         classes(
-            'entry-header__heading entry-header__thumbnail entry-content__wrapper author-bio'),
-        ]
+            'writers-link entry-header__author entry-header__title entry-header__thumbnail entry-content '
+            'author-bio__content '
+        )
+    ]
 
     remove_tags = [
+        dict(name = ['svg', 'button']),
         classes(
-            'entry-header__author entry-header__meta entry-meta insert--most-popular '
-            'subscribe-ribbon subscription-banner paywall__card'
+            'entry-meta audio-read-block insert--most-popular ad-slot ad-slot--in-content ad-content '
+            'subscription-banner ' 
         )
     ]
 
@@ -43,6 +45,11 @@ class spectator(BasicNewsRecipe):
         for fc in soup.findAll('figcaption'):
             fc['id'] = 'fig-c'
         return soup
+    
+    # the print_version loads all articles but sometimes it might fail due to too many requests
+    def print_version(self, url):
+        from urllib.parse import quote
+        return 'https://webcache.googleusercontent.com/search?q=cache:' + quote(url, safe='')
 
     def parse_index(self):
         soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
@@ -50,9 +57,10 @@ class spectator(BasicNewsRecipe):
             'magazine-header__container')).img['src'].split('?')[0]
         issue = self.tag_to_string(soup.find(**classes(
             'magazine-header__title'))).strip()
-        self.timefmt = ' (' + issue + ') [' + self.tag_to_string(soup.find(**classes(
-            'magazine-header__date'))).strip() + ']'
-        self.log('Downloading Issue: ', self.timefmt)
+        time = soup.find('time')
+        self.title = 'The Spectator ' + issue
+        self.timefmt = ' [' + self.tag_to_string(time) + ']'
+        self.log('Downloading Issue: ', self.title, self.timefmt)
         nav_div = soup.find('ul', **classes('archive-entry__nav-list'))
         section_list = []
 
@@ -94,17 +102,3 @@ class spectator(BasicNewsRecipe):
             self.log('\t', title, '\n\t', desc, '\n\t\t', url)
             ans.append({'title': title, 'description':desc, 'url': url})
         return ans
-
-    # Spectator changes the content it delivers based on cookies, so the
-    # following ensures that we send no cookies
-    def get_browser(self, *args, **kwargs):
-        return self
-
-    def clone_browser(self, *args, **kwargs):
-        return self.get_browser()
-
-    def open_novisit(self, *args, **kwargs):
-        br = browser()
-        return br.open_novisit(*args, **kwargs)
-
-    open = open_novisit