mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			127 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			127 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python
 | ||
| # vim:fileencoding=utf-8
 | ||
| #
 | ||
| # Title:        Substack
 | ||
| # License:      GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html
 | ||
| # Copyright:    Nathan Cook (nathan.cook@gmail.com)
 | ||
| ##
 | ||
| # Written:      2020-12-18
 | ||
| # Updated:      2024-11-04
 | ||
| ##
 | ||
| 
 | ||
| __license__ = 'GNU General Public License v3 – https://www.gnu.org/licenses/gpl-3.0.html'
 | ||
| __copyright__ = 'Nathan Cook – 2020-12-19'
 | ||
| __version__ = 'v0.1.1'
 | ||
| __date__ = '2020-12-19'
 | ||
| __author__ = 'topynate'
 | ||
| 
 | ||
| import json
 | ||
| import re
 | ||
| 
 | ||
| from mechanize import Request
 | ||
| 
 | ||
| from calibre.web.feeds.news import BasicNewsRecipe, classes
 | ||
| 
 | ||
| 
 | ||
| class Substack(BasicNewsRecipe):
 | ||
|     title = 'Substack'
 | ||
|     __author__ = 'topynate, unkn0wn'
 | ||
|     description = 'Use advanced menu if you want to add your own substack handles.'
 | ||
|     oldest_article = 7
 | ||
|     language = 'en'
 | ||
|     max_articles_per_feed = 100
 | ||
|     needs_subscription = 'optional'
 | ||
|     use_embedded_content = False
 | ||
|     masthead_url = 'https://substack.com/img/substack_wordmark.png'
 | ||
|     cover_url = 'https://substack.com/img/substack.png'
 | ||
|     extra_css = '.captioned-image-container, .image-container, .image-caption {font-size: small;}'
 | ||
|     remove_empty_feeds = True
 | ||
|     remove_attributes = ['style', 'height', 'width']
 | ||
|     no_stylesheets = True
 | ||
| 
 | ||
|     keep_only_tags = [
 | ||
|         classes('post-title post-subtitle subtitle available-content')
 | ||
|     ]
 | ||
| 
 | ||
|     remove_tags = [
 | ||
|         dict(name=['svg', 'source']),
 | ||
|         classes('subscribe-widget button-wrapper')
 | ||
|     ]
 | ||
| 
 | ||
|     recipe_specific_options = {
 | ||
|         'auths': {
 | ||
|             'short': 'enter the @handles you subscribe to:\nseperated by a space',
 | ||
|             'long': '@julianmacfarlane @simplicius76 .... ....',
 | ||
|             'default': '@julianmacfarlane @simplicius76 @caitlinjohnstone @michaelmoore @seymourhersh @geopolitiq',
 | ||
|         },
 | ||
|         'days': {
 | ||
|             'short': 'Oldest article to download from this news source. In days ',
 | ||
|             'long': 'For example, 0.5, gives you articles from the past 12 hours',
 | ||
|             'default': str(oldest_article),
 | ||
|         },
 | ||
|         'res': {
 | ||
|             'short': 'For hi-res images, select a resolution from the\nfollowing options: 800, 1000, 1200 or 1500',
 | ||
|             'long': 'This is useful for non e-ink devices, and for a lower file size\nthan the default, use 400 or 300.',
 | ||
|             'default': '600',
 | ||
|         },
 | ||
|         'rev': {
 | ||
|             'short': 'Reverse the order of articles in each feed?',
 | ||
|             'long': 'enter yes',
 | ||
|             'default': 'no',
 | ||
|         },
 | ||
|     }
 | ||
| 
 | ||
|     def __init__(self, *args, **kwargs):
 | ||
|         BasicNewsRecipe.__init__(self, *args, **kwargs)
 | ||
|         d = self.recipe_specific_options.get('days')
 | ||
|         if d and isinstance(d, str):
 | ||
|             self.oldest_article = float(d)
 | ||
|         r = self.recipe_specific_options.get('rev')
 | ||
|         if r and isinstance(r, str):
 | ||
|             if r.lower().strip() == 'yes':
 | ||
|                 self.reverse_article_order = True
 | ||
| 
 | ||
|     # Every Substack publication has an RSS feed at https://{name}.substack.com/feed.
 | ||
|     # The same URL provides either all posts, or all free posts + previews of paid posts,
 | ||
|     # depending on whether you're logged in.
 | ||
|     # feeds          = [
 | ||
|     #     ('Novum Lumen', 'https://novumlumen.substack.com/feed'),    # gratuitously self-promotional example
 | ||
|     # ]
 | ||
| 
 | ||
|     def get_browser(self):
 | ||
|         br = BasicNewsRecipe.get_browser(self)
 | ||
|         if self.username is not None and self.password is not None:
 | ||
|             br.open('https://substack.com/account/login?redirect=%2F&email=&with_password=')
 | ||
|             data = json.dumps({'email': self.username, 'password': self.password, 'captcha_response':None})
 | ||
|             req = Request(
 | ||
|                 url='https://substack.com/api/v1/email-login',
 | ||
|                 headers={
 | ||
|                     'Accept': '*/*',
 | ||
|                     'Content-Type': 'application/json',
 | ||
|                     'Origin': 'https://substack.com',
 | ||
|                     'Referer': 'https://substack.com/account/login?redirect=%2F&email=&with_password=',
 | ||
|                 },
 | ||
|                 data=data,
 | ||
|                 method='POST')
 | ||
|             res = br.open(req)
 | ||
|             if res.getcode() != 200:
 | ||
|                 raise ValueError('Login failed, check username and password')
 | ||
|         return br
 | ||
| 
 | ||
|     def get_feeds(self):
 | ||
|         ans = []
 | ||
|         u = self.recipe_specific_options.get('auths')
 | ||
|         if u and isinstance(u, str):
 | ||
|             for x in u.replace('@', ' ').split():
 | ||
|                 ans.append('https://' + x + '.substack.com/feed')
 | ||
|         return ans
 | ||
| 
 | ||
|     def preprocess_html(self, soup):
 | ||
|         res = '600'
 | ||
|         w = self.recipe_specific_options.get('res')
 | ||
|         if w and isinstance(w, str):
 | ||
|             res = w
 | ||
|         for img in soup.findAll('img', attrs={'src': True}):
 | ||
|             img['src'] = re.sub(r'w_\d+', 'w_' + res, img['src'])
 | ||
|         return soup
 |