mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update substack.recipe
This commit is contained in:
parent
791f495389
commit
5f429c3a2d
@ -6,6 +6,7 @@
|
|||||||
# Copyright: Nathan Cook (nathan.cook@gmail.com)
|
# Copyright: Nathan Cook (nathan.cook@gmail.com)
|
||||||
##
|
##
|
||||||
# Written: 2020-12-18
|
# Written: 2020-12-18
|
||||||
|
# Updated: 2024-11-04
|
||||||
##
|
##
|
||||||
|
|
||||||
__license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html'
|
__license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html'
|
||||||
@ -14,6 +15,7 @@ __version__ = 'v0.1.1'
|
|||||||
__date__ = '2020-12-19'
|
__date__ = '2020-12-19'
|
||||||
__author__ = 'topynate'
|
__author__ = 'topynate'
|
||||||
|
|
||||||
|
import re
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
@ -22,20 +24,35 @@ from mechanize import Request
|
|||||||
|
|
||||||
class Substack(BasicNewsRecipe):
|
class Substack(BasicNewsRecipe):
|
||||||
title = 'Substack'
|
title = 'Substack'
|
||||||
__author__ = 'topynate'
|
__author__ = 'topynate, unkn0wn'
|
||||||
|
description = 'Use advanced menu if you want to add your own substack handles.'
|
||||||
oldest_article = 7
|
oldest_article = 7
|
||||||
language = 'en'
|
language = 'en'
|
||||||
max_articles_per_feed = 100
|
max_articles_per_feed = 100
|
||||||
auto_cleanup = True
|
auto_cleanup = True
|
||||||
|
auto_cleanup_keep = '//*[@class="subtitle"]'
|
||||||
needs_subscription = 'optional'
|
needs_subscription = 'optional'
|
||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
masthead_url = 'https://substack.com/img/substack_wordmark.png'
|
||||||
|
cover_url = 'https://substack.com/img/substack.png'
|
||||||
|
extra_css = '.captioned-image-container, .image-container {font-size: small;}'
|
||||||
|
|
||||||
recipe_specific_options = {
|
recipe_specific_options = {
|
||||||
|
'auths': {
|
||||||
|
'short': 'enter the @handles you subscribe to:\nseperated by a space',
|
||||||
|
'long': 'julianmacfarlane ianleslie .... ....',
|
||||||
|
'default': 'julianmacfarlane ianleslie thesalvo',
|
||||||
|
},
|
||||||
'days': {
|
'days': {
|
||||||
'short': 'Oldest article to download from this news source. In days ',
|
'short': 'Oldest article to download from this news source. In days ',
|
||||||
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
'long': 'For example, 0.5, gives you articles from the past 12 hours',
|
||||||
'default': str(oldest_article)
|
'default': str(oldest_article),
|
||||||
}
|
},
|
||||||
|
'res': {
|
||||||
|
'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
|
||||||
|
'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
|
||||||
|
'default': '600',
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -47,9 +64,9 @@ class Substack(BasicNewsRecipe):
|
|||||||
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
|
# Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
|
||||||
# The same URL provides either all posts, or all free posts + previews of paid posts,
|
# The same URL provides either all posts, or all free posts + previews of paid posts,
|
||||||
# depending on whether you're logged in.
|
# depending on whether you're logged in.
|
||||||
feeds = [
|
# feeds = [
|
||||||
('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
|
# ('Novum Lumen', 'https://novumlumen.substack.com/feed'), # gratuitously self-promotional example
|
||||||
]
|
# ]
|
||||||
|
|
||||||
def get_browser(self):
|
def get_browser(self):
|
||||||
br = BasicNewsRecipe.get_browser(self)
|
br = BasicNewsRecipe.get_browser(self)
|
||||||
@ -70,3 +87,24 @@ class Substack(BasicNewsRecipe):
|
|||||||
if res.getcode() != 200:
|
if res.getcode() != 200:
|
||||||
raise ValueError('Login failed, check username and password')
|
raise ValueError('Login failed, check username and password')
|
||||||
return br
|
return br
|
||||||
|
|
||||||
|
def get_feeds(self):
|
||||||
|
ans = []
|
||||||
|
u = self.recipe_specific_options.get('auths')
|
||||||
|
if u and isinstance(u, str):
|
||||||
|
for x in u.split():
|
||||||
|
ans.append('https://' + x.replace('@', ' ') + '.substack.com/feed')
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
res = '600'
|
||||||
|
w = self.recipe_specific_options.get('res')
|
||||||
|
if w and isinstance(w, str):
|
||||||
|
res = w
|
||||||
|
for img in soup.findAll('img', attrs={'src': True}):
|
||||||
|
img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
|
||||||
|
for src in soup.findAll(['source', 'svg']):
|
||||||
|
src.extract()
|
||||||
|
for but in soup.findAll(attrs={'class': ['button-wrapper']}):
|
||||||
|
but.extract()
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user