From d62dab1e2e7c1f30ffb29049caa9381ba9d31a20 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 18 Oct 2023 06:41:13 +0530
Subject: [PATCH] make subscription for WSJ optional as recipe currently uses
 archive.is

---
 recipes/wsj.recipe      | 12 ++++++------
 recipes/wsj_free.recipe | 14 +++++++-------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index c39565c6d1..fa9377c7e0 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -10,7 +10,8 @@ from base64 import standard_b64encode
 from datetime import date, timedelta
 from mechanize import Request
 
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe
 from css_selectors import Select
 
 try:
@@ -22,10 +23,8 @@ try:
 except ImportError:
     from urllib import quote
 
-from calibre.scraper.simple import read_url
-from calibre.ptempfile import PersistentTemporaryFile
 
-needs_subscription = True
+needs_subscription = 'optional'
 
 
 def substring_classes(classes):
@@ -84,6 +83,7 @@ class WSJ(BasicNewsRecipe):
 
     articles_are_obfuscated = True
     def get_obfuscated_article(self, url):
+        from calibre.scraper.simple import read_url
         br = self.get_browser()
         br.set_handle_redirect(False)
         try:
@@ -96,7 +96,7 @@ class WSJ(BasicNewsRecipe):
         pt.close()
         return pt.name
 
-   def preprocess_html(self, soup):                     
+    def preprocess_html(self, soup):
         for img in soup.findAll('img', attrs={'old-src':True}):
             img['src'] = img['old-src']
         for p in soup.findAll('div', attrs={'data-type':['paragraph', 'image']}):
@@ -124,7 +124,7 @@ class WSJ(BasicNewsRecipe):
         br.set_cookie('ccpaApplies', 'false', '.wsj.com')
         return br
 
-    if needs_subscription:
+    if False and needs_subscription:  # disabled as we currently use archive.is
         def get_browser(self, *a, **kw):
             from pprint import pprint
             pprint
diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe
index 97bf2064e1..3099d9fff8 100644
--- a/recipes/wsj_free.recipe
+++ b/recipes/wsj_free.recipe
@@ -10,7 +10,8 @@ from base64 import standard_b64encode
 from datetime import date, timedelta
 from mechanize import Request
 
-from calibre.web.feeds.news import BasicNewsRecipe, classes
+from calibre.ptempfile import PersistentTemporaryFile
+from calibre.web.feeds.news import BasicNewsRecipe
 from css_selectors import Select
 
 try:
@@ -22,8 +23,6 @@ try:
 except ImportError:
     from urllib import quote
 
-from calibre.scraper.simple import read_url
-from calibre.ptempfile import PersistentTemporaryFile
 
 needs_subscription = False
 
@@ -67,13 +66,14 @@ class WSJ(BasicNewsRecipe):
         #big-top-caption { font-size:small; text-align:center; }
         [data-type:"tagline"] { font-style:italic; color:#202020; }
     '''
-    
+
     keep_only_tags = [
         dict(name=['h1', 'h2']),
         dict(attrs={'aria-describedby':'big-top-caption'}),
         dict(attrs={'id':'big-top-caption'}),
         dict(name='article')
     ]
+
     remove_tags = [
         dict(name=['button', 'svg', 'ufc-follow-author-widget']),
         dict(attrs={'aria-label':['Sponsored Offers', 'Listen To Article', 'What to Read Next']}),
@@ -83,6 +83,7 @@ class WSJ(BasicNewsRecipe):
 
     articles_are_obfuscated = True
     def get_obfuscated_article(self, url):
+        from calibre.scraper.simple import read_url
         br = self.get_browser()
         br.set_handle_redirect(False)
         try:
@@ -95,7 +96,7 @@ class WSJ(BasicNewsRecipe):
         pt.close()
         return pt.name
 
-    def preprocess_html(self, soup):                     
+    def preprocess_html(self, soup):
         for img in soup.findAll('img', attrs={'old-src':True}):
             img['src'] = img['old-src']
         for p in soup.findAll('div', attrs={'data-type':['paragraph', 'image']}):
@@ -114,7 +115,6 @@ class WSJ(BasicNewsRecipe):
                 h2.extract()
         return soup
 
-
     # login {{{
 
     def get_browser_for_wsj(self, *a, **kw):
@@ -124,7 +124,7 @@ class WSJ(BasicNewsRecipe):
         br.set_cookie('ccpaApplies', 'false', '.wsj.com')
         return br
 
-    if needs_subscription:
+    if False and needs_subscription:  # disabled as we currently use archive.is
         def get_browser(self, *a, **kw):
             from pprint import pprint
             pprint