calibre/recipes/himal_southasian.recipe
un-pogaz 8d28380515 add/remove blank-line (extra-edit)
ruff 'E302,E303,E304,E305,W391'
2025-01-24 11:14:21 +01:00

96 lines
3.2 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from html5_parser import parse
from calibre.web.feeds.news import BasicNewsRecipe
def get_story(story):
str_type = story.get('type', '')
if str_type == 'text':
yield '\n' + story['text']
elif str_type == 'image':
yield ''.join(img(story))
elif 'story-elements' in story:
for x in story['story-elements']:
yield from get_story(x)
def img(img):
yield '<p>'
if 'image-s3-key' in img:
yield '<img src="{}">'.format('https://media.assettype.com/' + img['image-s3-key'])
if 'title' in img:
yield '<div class="cap">' + img['title'] + '</div>'
yield '</p>'
class himal(BasicNewsRecipe):
title = 'Himal Southasian'
__author__ = 'unkn0wn'
description = ('Himal Southasian is Southasias first and only regional magazine of politics and culture.'
' For over 30 years, Himal Southasian has challenged nationalist orthodoxies, and covered the region with '
'imagination, rigour and irreverence, with contributions from some of the most interesting writers in the region.')
language = 'en_IN'
no_stylesheets = True
remove_attributes = ['height', 'width', 'style']
ignore_duplicate_articles = {'url'}
masthead_url = 'https://gumlet.assettype.com/himalmag/2024-01/4ecc5615-eceb-4497-87c7-4e013083ba17/logo_.png'
encoding = 'utf-8'
resolve_internal_links = True
oldest_article = 30 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.cap, .auth {font-size:small;}
em, blockquote {color:#404040;}
.subhead { font-style:italic; color:#202020; }
'''
feeds = [
('Articles', 'https://www.himalmag.com/feed')
]
def preprocess_raw_html(self, raw, *a):
root = parse(raw)
m = root.xpath('//script[@id="static-page"]')
data = json.loads(m[0].text)['qt']['data']['story']
title = '<h1>' + data['headline'] + '</h1>'
subhead = auth = caption = lede = ''
if 'subheadline' in data:
subhead = '\n<p class="subhead">' + data['subheadline'] + '</p>'
if 'author-name' in data:
auth = '\n<div class="auth">' + data['author-name'] + '</div>'
if 'hero-image-s3-key' in data:
lede = '\n<p><img src="{}">'.format('https://media.assettype.com/' + data['hero-image-s3-key'])
if 'hero-image-caption' in data:
caption = '<div class="cap">' + data['hero-image-caption'] + '</div>'
body = ''
for ele in data['cards']:
for story in ele.get('story-elements', {}):
body += '\n'.join(get_story(story))
return '<html><body>\n' + title + subhead + auth + lede + caption + '<div>' + body + '\n</div></body></html>'