From 6fe43235a652fbe206230f894beb5ec8a71aa4d0 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 13 Jul 2024 12:14:11 +0530
Subject: [PATCH] Update wsj.recipe
---
recipes/wsj.recipe | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index c0b42bdaee..4c69702b3c 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -13,7 +13,7 @@ past_edition = None
def media_bucket(x):
if x.get('type', '') == 'image':
- if x.get('subtype', '') == 'graphic':
+ if x.get('subtype', '') == 'graphic' or 'images.wsj.net' not in x['manifest-url']:
return '

{}
\n'.format(
x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
)
@@ -82,6 +82,7 @@ class WSJ(BasicNewsRecipe):
byl.insert(0, read)
url = soup.find('p', attrs={'id':'share-link'})
if url:
+ url.name = 'div'
url['title'] = self.tag_to_string(url).strip()
url.string = ''
panel = soup.find('panel', attrs={'id':'metadata'})
@@ -97,6 +98,11 @@ class WSJ(BasicNewsRecipe):
x.insert_after(BeautifulSoup(y, 'html.parser'))
return soup
+ def postprocess_html(self, soup, first_fetch):
+ for pan in soup.findAll('panel'):
+ pan.name = 'div'
+ return soup
+
if not past_edition:
def _download_cover(self):
import os
@@ -178,6 +184,6 @@ class WSJ(BasicNewsRecipe):
return BeautifulSoup(raw).prettify()
def populate_article_metadata(self, article, soup, first):
- lnk = soup.find('p', attrs={'id':'share-link'})
+ lnk = soup.find('div', attrs={'id':'share-link'})
if lnk:
article.url = lnk['title']