mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
93 lines
3.2 KiB
Python
93 lines
3.2 KiB
Python
#!/usr/bin/env python
|
||
# vim:fileencoding=utf-8
|
||
import json
|
||
|
||
from calibre.web.feeds.news import BasicNewsRecipe
|
||
from html5_parser import parse
|
||
|
||
|
||
def get_story(story):
|
||
str_type = story.get('type', '')
|
||
if str_type == 'text':
|
||
yield '\n' + story['text']
|
||
elif str_type == 'image':
|
||
yield ''.join(img(story))
|
||
elif 'story-elements' in story:
|
||
for x in story['story-elements']:
|
||
yield from get_story(x)
|
||
|
||
def img(img):
|
||
yield '<p>'
|
||
if 'image-s3-key' in img:
|
||
yield '<img src="{}">'.format('https://media.assettype.com/' + img['image-s3-key'])
|
||
if 'title' in img:
|
||
yield '<div class="cap">' + img['title'] + '</div>'
|
||
yield '</p>'
|
||
|
||
class himal(BasicNewsRecipe):
|
||
title = 'Himal Southasian'
|
||
__author__ = 'unkn0wn'
|
||
description = ('Himal Southasian is Southasia’s first and only regional magazine of politics and culture.'
|
||
' For over 30 years, Himal Southasian has challenged nationalist orthodoxies, and covered the region with '
|
||
'imagination, rigour and irreverence, with contributions from some of the most interesting writers in the region.')
|
||
language = 'en_IN'
|
||
no_stylesheets = True
|
||
remove_attributes = ['height', 'width', 'style']
|
||
ignore_duplicate_articles = {'url'}
|
||
masthead_url = 'https://gumlet.assettype.com/himalmag/2024-01/4ecc5615-eceb-4497-87c7-4e013083ba17/logo_.png'
|
||
encoding = 'utf-8'
|
||
resolve_internal_links = True
|
||
oldest_article = 30 # days
|
||
|
||
recipe_specific_options = {
|
||
'days': {
|
||
'short': 'Oldest article to download from this news source. In days ',
|
||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||
'default': str(oldest_article)
|
||
}
|
||
}
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||
d = self.recipe_specific_options.get('days')
|
||
if d and isinstance(d, str):
|
||
self.oldest_article = float(d)
|
||
|
||
extra_css = '''
|
||
.cap, .auth {font-size:small;}
|
||
em, blockquote {color:#404040;}
|
||
.subhead { font-style:italic; color:#202020; }
|
||
'''
|
||
|
||
feeds = [
|
||
('Articles', 'https://www.himalmag.com/feed')
|
||
]
|
||
|
||
def preprocess_raw_html(self, raw, *a):
|
||
root = parse(raw)
|
||
m = root.xpath('//script[@id="static-page"]')
|
||
data = json.loads(m[0].text)['qt']['data']['story']
|
||
|
||
title = '<h1>' + data['headline'] + '</h1>'
|
||
|
||
subhead = auth = caption = lede = ''
|
||
|
||
if 'subheadline' in data:
|
||
subhead = '\n<p class="subhead">' + data['subheadline'] + '</p>'
|
||
|
||
if 'author-name' in data:
|
||
auth = '\n<div class="auth">' + data['author-name'] + '</div>'
|
||
|
||
if 'hero-image-s3-key' in data:
|
||
lede = '\n<p><img src="{}">'.format('https://media.assettype.com/' + data['hero-image-s3-key'])
|
||
|
||
if 'hero-image-caption' in data:
|
||
caption = '<div class="cap">' + data['hero-image-caption'] + '</div>'
|
||
|
||
body = ''
|
||
for ele in data['cards']:
|
||
for story in ele.get('story-elements', {}):
|
||
body += '\n'.join(get_story(story))
|
||
|
||
return '<html><body>\n' + title + subhead + auth + lede + caption + '<div>' + body + '\n</div></body></html>'
|