#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from html5_parser import parse
from calibre.web.feeds.news import BasicNewsRecipe
def get_story(story):
str_type = story.get('type', '')
if str_type == 'text':
yield '\n' + story['text']
elif str_type == 'image':
yield ''.join(img(story))
elif 'story-elements' in story:
for x in story['story-elements']:
yield from get_story(x)
def img(img):
yield '
'
if 'image-s3-key' in img:
yield '
'.format('https://media.assettype.com/' + img['image-s3-key'])
if 'title' in img:
yield '
' + img['title'] + '
'
yield ''
class himal(BasicNewsRecipe):
title = 'Himal Southasian'
__author__ = 'unkn0wn'
description = ('Himal Southasian is Southasia’s first and only regional magazine of politics and culture.'
' For over 30 years, Himal Southasian has challenged nationalist orthodoxies, and covered the region with '
'imagination, rigour and irreverence, with contributions from some of the most interesting writers in the region.')
language = 'en_IN'
no_stylesheets = True
remove_attributes = ['height', 'width', 'style']
ignore_duplicate_articles = {'url'}
masthead_url = 'https://gumlet.assettype.com/himalmag/2024-01/4ecc5615-eceb-4497-87c7-4e013083ba17/logo_.png'
encoding = 'utf-8'
resolve_internal_links = True
oldest_article = 30 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.cap, .auth {font-size:small;}
em, blockquote {color:#404040;}
.subhead { font-style:italic; color:#202020; }
'''
feeds = [
('Articles', 'https://www.himalmag.com/feed')
]
def preprocess_raw_html(self, raw, *a):
root = parse(raw)
m = root.xpath('//script[@id="static-page"]')
data = json.loads(m[0].text)['qt']['data']['story']
title = '' + data['headline'] + '
'
subhead = auth = caption = lede = ''
if 'subheadline' in data:
subhead = '\n' + data['subheadline'] + '
'
if 'author-name' in data:
auth = '\n' + data['author-name'] + '
'
if 'hero-image-s3-key' in data:
lede = '\n
'.format('https://media.assettype.com/' + data['hero-image-s3-key'])
if 'hero-image-caption' in data:
caption = '
' + data['hero-image-caption'] + '
'
body = ''
for ele in data['cards']:
for story in ele.get('story-elements', {}):
body += '\n'.join(get_story(story))
return '\n' + title + subhead + auth + lede + caption + '' + body + '\n
'