From e8d08955d61c87b493feebd8eb7bd6f752e05280 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:54:39 +0530
Subject: [PATCH] Update WSJ
---
recipes/wsj.recipe | 21 ++++++++++++---------
recipes/wsj_mag.recipe | 14 ++++++++------
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index 958c041815..a0e2ac25e9 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -13,12 +13,16 @@ past_edition = None
def media_bucket(x):
if x.get('type', '') == 'image':
- return '
{}
\n'.format(
- x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ if x.get('subtype', '') == 'graphic':
+ return '

{}
\n'.format(
+ x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ )
+ return '

{}
\n'.format(
+ x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
if x.get('type', '') == 'video':
- return '
{}
\n'.format(
- x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + ''
+ return '

{}
\n'.format(
+ x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
return
@@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe):
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
if i_lst and m_itm:
for x, y in list(zip_longest(m_itm, i_lst)):
- x.name = 'p'
x.insert_after(BeautifulSoup(y, 'html.parser'))
return soup
@@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe):
break
dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
- dt = dt.strftime('%b %d, %Y')
- self.log('Downloading ', dt)
- self.timefmt = ' [' + dt + ']'
+ dt_ = dt.strftime('%b %d, %Y')
+ self.log('Downloading ', dt_)
+ self.timefmt = ' [' + dt_ + ']'
feeds = []
@@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe):
if '-pages_' in k:
section = k.split('-pages_')[0].replace('_', ' ')
if 'MAGAZINE' in section:
- if not datetime.now().strftime("%d") == 1:
+ if not dt.strftime('%d') == 1:
continue
self.log('Loading Magazine section')
self.log(section)
diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe
index 718ee49958..0a82cab0ae 100644
--- a/recipes/wsj_mag.recipe
+++ b/recipes/wsj_mag.recipe
@@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
def media_bucket(x):
if x.get('type', '') == 'image':
- return '
{}
\n'.format(
- x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ if x.get('subtype', '') == 'graphic':
+ return '

{}
\n'.format(
+ x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ )
+ return '

{}
\n'.format(
+ x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
if x.get('type', '') == 'video':
- return '
{}
\n'.format(
- x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + ''
+ return '

{}
\n'.format(
+ x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
return
-
class WSJ(BasicNewsRecipe):
title = 'WSJ. Magazine'
__author__ = 'unkn0wn'
@@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe):
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
if i_lst and m_itm:
for x, y in list(zip_longest(m_itm, i_lst)):
- x.name = 'p'
x.insert_after(BeautifulSoup(y, 'html.parser'))
return soup