From e8d08955d61c87b493feebd8eb7bd6f752e05280 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:54:39 +0530 Subject: [PATCH] Update WSJ --- recipes/wsj.recipe | 21 ++++++++++++--------- recipes/wsj_mag.recipe | 14 ++++++++------ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 958c041815..a0e2ac25e9 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -13,12 +13,16 @@ past_edition = None def media_bucket(x): if x.get('type', '') == 'image': - return '
{}
\n'.format( - x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + if x.get('subtype', '') == 'graphic': + return '
{}
\n'.format( + x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + ) + return '
{}
\n'.format( + x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) if x.get('type', '') == 'video': - return '
{}
\n'.format( - x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + '' + return '
{}
\n'.format( + x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) return @@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe): m_itm = soup.findAll('panel', attrs={'class':'media-item'}) if i_lst and m_itm: for x, y in list(zip_longest(m_itm, i_lst)): - x.name = 'p' x.insert_after(BeautifulSoup(y, 'html.parser')) return soup @@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe): break dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone) - dt = dt.strftime('%b %d, %Y') - self.log('Downloading ', dt) - self.timefmt = ' [' + dt + ']' + dt_ = dt.strftime('%b %d, %Y') + self.log('Downloading ', dt_) + self.timefmt = ' [' + dt_ + ']' feeds = [] @@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe): if '-pages_' in k: section = k.split('-pages_')[0].replace('_', ' ') if 'MAGAZINE' in section: - if not datetime.now().strftime("%d") == 1: + if not dt.strftime('%d') == 1: continue self.log('Loading Magazine section') self.log(section) diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe index 718ee49958..0a82cab0ae 100644 --- a/recipes/wsj_mag.recipe +++ b/recipes/wsj_mag.recipe @@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes def media_bucket(x): if x.get('type', '') == 'image': - return '
{}
\n'.format( - x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + if x.get('subtype', '') == 'graphic': + return '
{}
\n'.format( + x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + ) + return '
{}
\n'.format( + x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) if x.get('type', '') == 'video': - return '
{}
\n'.format( - x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + '' + return '
{}
\n'.format( + x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) return - class WSJ(BasicNewsRecipe): title = 'WSJ. Magazine' __author__ = 'unkn0wn' @@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe): m_itm = soup.findAll('panel', attrs={'class':'media-item'}) if i_lst and m_itm: for x, y in list(zip_longest(m_itm, i_lst)): - x.name = 'p' x.insert_after(BeautifulSoup(y, 'html.parser')) return soup