calibre/recipes/substack.recipe
unkn0w7n de7f80653c ...
2025-01-05 14:46:25 +05:30

127 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# vim:fileencoding=utf-8
#
# Title: Substack
# License: GNU General Public License v3 https://www.gnu.org/licenses/gpl-3.0.html
# Copyright: Nathan Cook (nathan.cook@gmail.com)
##
# Written: 2020-12-18
# Updated: 2024-11-04
##
__license__ = 'GNU General Public License v3 https://www.gnu.org/licenses/gpl-3.0.html'
__copyright__ = 'Nathan Cook 2020-12-19'
__version__ = 'v0.1.1'
__date__ = '2020-12-19'
__author__ = 'topynate'
import json
import re
from mechanize import Request
from calibre.web.feeds.news import BasicNewsRecipe, classes
class Substack(BasicNewsRecipe):
title = 'Substack'
__author__ = 'topynate, unkn0wn'
description = 'Use advanced menu if you want to add your own substack handles.'
oldest_article = 7
language = 'en'
max_articles_per_feed = 100
needs_subscription = 'optional'
use_embedded_content = False
masthead_url = 'https://substack.com/img/substack_wordmark.png'
cover_url = 'https://substack.com/img/substack.png'
extra_css = '.captioned-image-container, .image-container, .image-caption {font-size: small;}'
remove_empty_feeds = True
remove_attributes = ['style', 'height', 'width']
no_stylesheets = True
keep_only_tags = [
classes('post-title post-subtitle subtitle available-content')
]
remove_tags = [
dict(name=['svg', 'source']),
classes('subscribe-widget button-wrapper')
]
recipe_specific_options = {
'auths': {
'short': 'enter the @handles you subscribe to:\nseperated by a space',
'long': '@julianmacfarlane @simplicius76 .... ....',
'default': '@julianmacfarlane @simplicius76 @caitlinjohnstone @michaelmoore @seymourhersh @geopolitiq',
},
'days': {
'short': 'Oldest article to download from this news source. In days ',
'long': 'For example, 0.5, gives you articles from the past 12 hours',
'default': str(oldest_article),
},
'res': {
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
'default': '600',
},
'rev': {
'short': 'Reverse the order of articles in each feed?',
'long': 'enter yes',
'default': 'no',
},
}
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
d = self.recipe_specific_options.get('days')
if d and isinstance(d, str):
self.oldest_article = float(d)
r = self.recipe_specific_options.get('rev')
if r and isinstance(r, str):
if r.lower().strip() == 'yes':
self.reverse_article_order = True
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
# The same URL provides either all posts, or all free posts + previews of paid posts,
# depending on whether you're logged in.
# feeds = [
# ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
# ]
def get_browser(self):
br = BasicNewsRecipe.get_browser(self)
if self.username is not None and self.password is not None:
br.open('https://substack.com/account/login?redirect=%2F&email=&with_password=')
data = json.dumps({'email': self.username, 'password': self.password, 'captcha_response':None})
req = Request(
url='https://substack.com/api/v1/email-login',
headers={
'Accept': '*/*',
'Content-Type': 'application/json',
'Origin': 'https://substack.com',
'Referer': 'https://substack.com/account/login?redirect=%2F&email=&with_password=',
},
data=data,
method='POST')
res = br.open(req)
if res.getcode() != 200:
raise ValueError('Login failed, check username and password')
return br
def get_feeds(self):
ans = []
u = self.recipe_specific_options.get('auths')
if u and isinstance(u, str):
for x in u.replace('@', ' ').split():
ans.append('https://' + x + '.substack.com/feed')
return ans
def preprocess_html(self, soup):
res = '600'
w = self.recipe_specific_options.get('res')
if w and isinstance(w, str):
res = w
for img in soup.findAll('img', attrs={'src': True}):
img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
return soup