From 6fe43235a652fbe206230f894beb5ec8a71aa4d0 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Sat, 13 Jul 2024 12:14:11 +0530 Subject: [PATCH] Update wsj.recipe --- recipes/wsj.recipe | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index c0b42bdaee..4c69702b3c 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -13,7 +13,7 @@ past_edition = None def media_bucket(x): if x.get('type', '') == 'image': - if x.get('subtype', '') == 'graphic': + if x.get('subtype', '') == 'graphic' or 'images.wsj.net' not in x['manifest-url']: return '
{}
\n'.format( x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' ) @@ -82,6 +82,7 @@ class WSJ(BasicNewsRecipe): byl.insert(0, read) url = soup.find('p', attrs={'id':'share-link'}) if url: + url.name = 'div' url['title'] = self.tag_to_string(url).strip() url.string = '' panel = soup.find('panel', attrs={'id':'metadata'}) @@ -97,6 +98,11 @@ class WSJ(BasicNewsRecipe): x.insert_after(BeautifulSoup(y, 'html.parser')) return soup + def postprocess_html(self, soup, first_fetch): + for pan in soup.findAll('panel'): + pan.name = 'div' + return soup + if not past_edition: def _download_cover(self): import os @@ -178,6 +184,6 @@ class WSJ(BasicNewsRecipe): return BeautifulSoup(raw).prettify() def populate_article_metadata(self, article, soup, first): - lnk = soup.find('p', attrs={'id':'share-link'}) + lnk = soup.find('div', attrs={'id':'share-link'}) if lnk: article.url = lnk['title']