calibre/recipes/himal_southasian.recipe
unkn0w7n b71e3ef705 Update recipe_specific_options
for feeds based recipes
2024-07-22 15:31:08 +05:30

93 lines
3.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# vim:fileencoding=utf-8
import json
from calibre.web.feeds.news import BasicNewsRecipe
from html5_parser import parse
def get_story(story):
str_type = story.get('type', '')
if str_type == 'text':
yield '\n' + story['text']
elif str_type == 'image':
yield ''.join(img(story))
elif 'story-elements' in story:
for x in story['story-elements']:
yield from get_story(x)
def img(img):
yield '<p>'
if 'image-s3-key' in img:
yield '<img src="{}">'.format('https://media.assettype.com/' + img['image-s3-key'])
if 'title' in img:
yield '<div class="cap">' + img['title'] + '</div>'
yield '</p>'
class himal(BasicNewsRecipe):
title = 'Himal Southasian'
__author__ = 'unkn0wn'
description = ('Himal Southasian is Southasias first and only regional magazine of politics and culture.'
' For over 30 years, Himal Southasian has challenged nationalist orthodoxies, and covered the region with '
'imagination, rigour and irreverence, with contributions from some of the most interesting writers in the region.')
language = 'en_IN'
no_stylesheets = True
remove_attributes = ['height', 'width', 'style']
ignore_duplicate_articles = {'url'}
masthead_url = 'https://gumlet.assettype.com/himalmag/2024-01/4ecc5615-eceb-4497-87c7-4e013083ba17/logo_.png'
encoding = 'utf-8'
resolve_internal_links = True
oldest_article = 30 # days
recipe_specific_options = {
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article)
}
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
extra_css = '''
.cap, .auth {font-size:small;}
em, blockquote {color:#404040;}
.subhead { font-style:italic; color:#202020; }
'''
feeds = [
('Articles', 'https://www.himalmag.com/feed')
]
def preprocess_raw_html(self, raw, *a):
root = parse(raw)
m = root.xpath('//script[@id="static-page"]')
data = json.loads(m[0].text)['qt']['data']['story']
title = '<h1>' + data['headline'] + '</h1>'
subhead = auth = caption = lede = ''
if 'subheadline' in data:
subhead = '\n<p class="subhead">' + data['subheadline'] + '</p>'
if 'author-name' in data:
auth = '\n<div class="auth">' + data['author-name'] + '</div>'
if 'hero-image-s3-key' in data:
lede = '\n<p><img src="{}">'.format('https://media.assettype.com/' + data['hero-image-s3-key'])
if 'hero-image-caption' in data:
caption = '<div class="cap">' + data['hero-image-caption'] + '</div>'
body = ''
for ele in data['cards']:
for story in ele.get('story-elements', {}):
body += '\n'.join(get_story(story))
return '<html><body>\n' + title + subhead + auth + lede + caption + '<div>' + body + '\n</div></body></html>'