Merge branch 'master' of https://github.com/unkn0w7n/calibre

2025-07-09 03:04:10 -04:00 · 2023-10-15 13:31:34 +05:30 · 2023-10-15 13:31:34 +05:30 · 36e08bd767
commit 36e08bd767
parent 88fbe7424e 6fdacb38fc
1 changed files with 19 additions and 25 deletions
--- a/recipes/spectator_magazine.recipe
+++ b/recipes/spectator_magazine.recipe
@ -1,6 +1,4 @@
 from calibre.web.feeds.news import BasicNewsRecipe, classes
-from calibre import browser
-

 def absurl(url):
    if url.startswith('/'):
@ -22,20 +20,24 @@ class spectator(BasicNewsRecipe):
    resolve_internal_links = True

    extra_css = '''
-        .author-bio {font-size:small;}
+        .writers-link__text, .author-bio__content {font-size:small; color:#404040;}
        #fig-c {text-align:center; font-size:small;}
-        blockquote, em {color:#404040;}
+        blockquote, em, i {color:#202020;}
+        img {display:block; margin:0 auto;}
    '''

    keep_only_tags = [
        classes(
-            'entry-header__heading entry-header__thumbnail entry-content__wrapper author-bio'),
+            'writers-link entry-header__author entry-header__title entry-header__thumbnail entry-content '
+            'author-bio__content '
+        )
    ]

    remove_tags = [
+        dict(name = ['svg', 'button']),
        classes(
-            'entry-header__author entry-header__meta entry-meta insert--most-popular '
-            'subscribe-ribbon subscription-banner paywall__card'
+            'entry-meta audio-read-block insert--most-popular ad-slot ad-slot--in-content ad-content '
+            'subscription-banner ' 
        )
    ]

@ -44,15 +46,21 @@ class spectator(BasicNewsRecipe):
            fc['id'] = 'fig-c'
        return soup
    
+    # the print_version loads all articles but sometimes it might fail due to too many requests
+    def print_version(self, url):
+        from urllib.parse import quote
+        return 'https://webcache.googleusercontent.com/search?q=cache:' + quote(url, safe='')
+
    def parse_index(self):
        soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
        self.cover_url = soup.find(**classes(
            'magazine-header__container')).img['src'].split('?')[0]
        issue = self.tag_to_string(soup.find(**classes(
            'magazine-header__title'))).strip()
-        self.timefmt = ' (' + issue + ') [' + self.tag_to_string(soup.find(**classes(
-            'magazine-header__date'))).strip() + ']'
-        self.log('Downloading Issue: ', self.timefmt)
+        time = soup.find('time')
+        self.title = 'The Spectator ' + issue
+        self.timefmt = ' [' + self.tag_to_string(time) + ']'
+        self.log('Downloading Issue: ', self.title, self.timefmt)
        nav_div = soup.find('ul', **classes('archive-entry__nav-list'))
        section_list = []

@ -94,17 +102,3 @@ class spectator(BasicNewsRecipe):
            self.log('\t', title, '\n\t', desc, '\n\t\t', url)
            ans.append({'title': title, 'description':desc, 'url': url})
        return ans
-
-    # Spectator changes the content it delivers based on cookies, so the
-    # following ensures that we send no cookies
-    def get_browser(self, *args, **kwargs):
-        return self
-
-    def clone_browser(self, *args, **kwargs):
-        return self.get_browser()
-
-    def open_novisit(self, *args, **kwargs):
-        br = browser()
-        return br.open_novisit(*args, **kwargs)
-
-    open = open_novisit