mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
127 lines
4.7 KiB
Python
127 lines
4.7 KiB
Python
#!/usr/bin/env python
|
||
# vim:fileencoding=utf-8
|
||
#
|
||
# Title: Substack
|
||
# License: GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html
|
||
# Copyright: Nathan Cook (nathan.cook@gmail.com)
|
||
##
|
||
# Written: 2020-12-18
|
||
# Updated: 2024-11-04
|
||
##
|
||
|
||
__license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html'
|
||
__copyright__ = 'Nathan Cook – 2020-12-19'
|
||
__version__ = 'v0.1.1'
|
||
__date__ = '2020-12-19'
|
||
__author__ = 'topynate'
|
||
|
||
import json
|
||
import re
|
||
|
||
from mechanize import Request
|
||
|
||
from calibre.web.feeds.news import BasicNewsRecipe, classes
|
||
|
||
|
||
class Substack(BasicNewsRecipe):
|
||
title = 'Substack'
|
||
__author__ = 'topynate, unkn0wn'
|
||
description = 'Use advanced menu if you want to add your own substack handles.'
|
||
oldest_article = 7
|
||
language = 'en'
|
||
max_articles_per_feed = 100
|
||
needs_subscription = 'optional'
|
||
use_embedded_content = False
|
||
masthead_url = 'https://substack.com/img/substack_wordmark.png'
|
||
cover_url = 'https://substack.com/img/substack.png'
|
||
extra_css = '.captioned-image-container, .image-container, .image-caption {font-size: small;}'
|
||
remove_empty_feeds = True
|
||
remove_attributes = ['style', 'height', 'width']
|
||
no_stylesheets = True
|
||
|
||
keep_only_tags = [
|
||
classes('post-title post-subtitle subtitle available-content')
|
||
]
|
||
|
||
remove_tags = [
|
||
dict(name=['svg', 'source']),
|
||
classes('subscribe-widget button-wrapper')
|
||
]
|
||
|
||
recipe_specific_options = {
|
||
'auths': {
|
||
'short': 'enter the @handles you subscribe to:\nseperated by a space',
|
||
'long': '@julianmacfarlane @simplicius76 .... ....',
|
||
'default': '@julianmacfarlane @simplicius76 @caitlinjohnstone @michaelmoore @seymourhersh @geopolitiq',
|
||
},
|
||
'days': {
|
||
'short': 'Oldest article to download from this news source. In days ',
|
||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||
'default': str(oldest_article),
|
||
},
|
||
'res': {
|
||
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||
'default': '600',
|
||
},
|
||
'rev': {
|
||
'short': 'Reverse the order of articles in each feed?',
|
||
'long': 'enter yes',
|
||
'default': 'no',
|
||
},
|
||
}
|
||
|
||
def __init__(self, *args, **kwargs):
|
||
BasicNewsRecipe.__init__(self, *args, **kwargs)
|
||
d = self.recipe_specific_options.get('days')
|
||
if d and isinstance(d, str):
|
||
self.oldest_article = float(d)
|
||
r = self.recipe_specific_options.get('rev')
|
||
if r and isinstance(r, str):
|
||
if r.lower().strip() == 'yes':
|
||
self.reverse_article_order = True
|
||
|
||
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
|
||
# The same URL provides either all posts, or all free posts + previews of paid posts,
|
||
# depending on whether you're logged in.
|
||
# feeds = [
|
||
# ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
|
||
# ]
|
||
|
||
def get_browser(self):
|
||
br = BasicNewsRecipe.get_browser(self)
|
||
if self.username is not None and self.password is not None:
|
||
br.open('https://substack.com/account/login?redirect=%2F&email=&with_password=')
|
||
data = json.dumps({'email': self.username, 'password': self.password, 'captcha_response':None})
|
||
req = Request(
|
||
url='https://substack.com/api/v1/email-login',
|
||
headers={
|
||
'Accept': '*/*',
|
||
'Content-Type': 'application/json',
|
||
'Origin': 'https://substack.com',
|
||
'Referer': 'https://substack.com/account/login?redirect=%2F&email=&with_password=',
|
||
},
|
||
data=data,
|
||
method='POST')
|
||
res = br.open(req)
|
||
if res.getcode() != 200:
|
||
raise ValueError('Login failed, check username and password')
|
||
return br
|
||
|
||
def get_feeds(self):
|
||
ans = []
|
||
u = self.recipe_specific_options.get('auths')
|
||
if u and isinstance(u, str):
|
||
for x in u.replace('@', ' ').split():
|
||
ans.append('https://' + x + '.substack.com/feed')
|
||
return ans
|
||
|
||
def preprocess_html(self, soup):
|
||
res = '600'
|
||
w = self.recipe_specific_options.get('res')
|
||
if w and isinstance(w, str):
|
||
res = w
|
||
for img in soup.findAll('img', attrs={'src': True}):
|
||
img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
|
||
return soup
|