This commit is contained in:
Kovid Goyal 2023-10-15 13:31:34 +05:30
commit 36e08bd767
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -1,6 +1,4 @@
from calibre.web.feeds.news import BasicNewsRecipe, classes
from calibre import browser
def absurl(url):
if url.startswith('/'):
@ -22,20 +20,24 @@ class spectator(BasicNewsRecipe):
resolve_internal_links = True
extra_css = '''
.author-bio {font-size:small;}
.writers-link__text, .author-bio__content {font-size:small; color:#404040;}
#fig-c {text-align:center; font-size:small;}
blockquote, em {color:#404040;}
blockquote, em, i {color:#202020;}
img {display:block; margin:0 auto;}
'''
keep_only_tags = [
classes(
'entry-header__heading entry-header__thumbnail entry-content__wrapper author-bio'),
'writers-link entry-header__author entry-header__title entry-header__thumbnail entry-content '
'author-bio__content '
)
]
remove_tags = [
dict(name = ['svg', 'button']),
classes(
'entry-header__author entry-header__meta entry-meta insert--most-popular '
'subscribe-ribbon subscription-banner paywall__card'
'entry-meta audio-read-block insert--most-popular ad-slot ad-slot--in-content ad-content '
'subscription-banner '
)
]
@ -44,15 +46,21 @@ class spectator(BasicNewsRecipe):
fc['id'] = 'fig-c'
return soup
# the print_version loads all articles but sometimes it might fail due to too many requests
def print_version(self, url):
from urllib.parse import quote
return 'https://webcache.googleusercontent.com/search?q=cache:' + quote(url, safe='')
def parse_index(self):
soup = self.index_to_soup('https://www.spectator.co.uk/magazine')
self.cover_url = soup.find(**classes(
'magazine-header__container')).img['src'].split('?')[0]
issue = self.tag_to_string(soup.find(**classes(
'magazine-header__title'))).strip()
self.timefmt = ' (' + issue + ') [' + self.tag_to_string(soup.find(**classes(
'magazine-header__date'))).strip() + ']'
self.log('Downloading Issue: ', self.timefmt)
time = soup.find('time')
self.title = 'The Spectator ' + issue
self.timefmt = ' [' + self.tag_to_string(time) + ']'
self.log('Downloading Issue: ', self.title, self.timefmt)
nav_div = soup.find('ul', **classes('archive-entry__nav-list'))
section_list = []
@ -94,17 +102,3 @@ class spectator(BasicNewsRecipe):
self.log('\t', title, '\n\t', desc, '\n\t\t', url)
ans.append({'title': title, 'description':desc, 'url': url})
return ans
# Spectator changes the content it delivers based on cookies, so the
# following ensures that we send no cookies
def get_browser(self, *args, **kwargs):
return self
def clone_browser(self, *args, **kwargs):
return self.get_browser()
def open_novisit(self, *args, **kwargs):
br = browser()
return br.open_novisit(*args, **kwargs)
open = open_novisit