#!/usr/bin/env python # vim:fileencoding=utf-8 import json from html5_parser import parse from calibre.web.feeds.news import BasicNewsRecipe def get_story(story): str_type = story.get('type', '') if str_type == 'text': yield '\n' + story['text'] elif str_type == 'image': yield ''.join(img(story)) elif 'story-elements' in story: for x in story['story-elements']: yield from get_story(x) def img(img): yield '

' if 'image-s3-key' in img: yield ''.format('https://media.assettype.com/' + img['image-s3-key']) if 'title' in img: yield '

' + img['title'] + '
' yield '

' class himal(BasicNewsRecipe): title = 'Himal Southasian' __author__ = 'unkn0wn' description = ('Himal Southasian is Southasia’s first and only regional magazine of politics and culture.' ' For over 30 years, Himal Southasian has challenged nationalist orthodoxies, and covered the region with ' 'imagination, rigour and irreverence, with contributions from some of the most interesting writers in the region.') language = 'en_IN' no_stylesheets = True remove_attributes = ['height', 'width', 'style'] ignore_duplicate_articles = {'url'} masthead_url = 'https://gumlet.assettype.com/himalmag/2024-01/4ecc5615-eceb-4497-87c7-4e013083ba17/logo_.png' encoding = 'utf-8' resolve_internal_links = True oldest_article = 30 # days recipe_specific_options = { 'days': { 'short': 'Oldest article to download from this news source. In days ', 'long': 'For example, 0.5, gives you articles from the past 12 hours', 'default': str(oldest_article) } } def __init__(self, *args, **kwargs): BasicNewsRecipe.__init__(self, *args, **kwargs) d = self.recipe_specific_options.get('days') if d and isinstance(d, str): self.oldest_article = float(d) extra_css = ''' .cap, .auth {font-size:small;} em, blockquote {color:#404040;} .subhead { font-style:italic; color:#202020; } ''' feeds = [ ('Articles', 'https://www.himalmag.com/feed') ] def preprocess_raw_html(self, raw, *a): root = parse(raw) m = root.xpath('//script[@id="static-page"]') data = json.loads(m[0].text)['qt']['data']['story'] title = '

' + data['headline'] + '

' subhead = auth = caption = lede = '' if 'subheadline' in data: subhead = '\n

' + data['subheadline'] + '

' if 'author-name' in data: auth = '\n
' + data['author-name'] + '
' if 'hero-image-s3-key' in data: lede = '\n

'.format('https://media.assettype.com/' + data['hero-image-s3-key']) if 'hero-image-caption' in data: caption = '

' + data['hero-image-caption'] + '
' body = '' for ele in data['cards']: for story in ele.get('story-elements', {}): body += '\n'.join(get_story(story)) return '\n' + title + subhead + auth + lede + caption + '
' + body + '\n
'