Update WSJ

2025-08-30 23:00:21 -04:00 · 2024-06-26 18:54:39 +05:30 · 2024-06-26 18:54:39 +05:30 · e8d08955d6
commit e8d08955d6
parent 62095818fa
2 changed files with 20 additions and 15 deletions
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@ -13,12 +13,16 @@ past_edition = None
 def media_bucket(x):
    if x.get('type', '') == 'image':
-        return '<img src="{}"><div class="figc">{}</div>\n'.format(
+        if x.get('subtype', '') == 'graphic':
-            x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
+            return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
                x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
            )
        return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
            x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
        )
    if x.get('type', '') == 'video':
-        return '<a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
+        return '<br><a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
-            x['share_link'], x['thumbnail_url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
+            x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
        )
    return
@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe):
                m_itm = soup.findAll('panel', attrs={'class':'media-item'})
                if i_lst and m_itm:
                    for x, y in list(zip_longest(m_itm, i_lst)):
                        x.name = 'p'
                        x.insert_after(BeautifulSoup(y, 'html.parser'))
        return soup
@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe):
                break
        dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
-        dt = dt.strftime('%b %d, %Y')
+        dt_ = dt.strftime('%b %d, %Y')
-        self.log('Downloading ', dt)
+        self.log('Downloading ', dt_)
-        self.timefmt = ' [' + dt + ']'
+        self.timefmt = ' [' + dt_ + ']'
        feeds = []
@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe):
                if '-pages_' in k:
                    section = k.split('-pages_')[0].replace('_', ' ')
                    if 'MAGAZINE' in section:
-                        if not datetime.now().strftime("%d") == 1:
+                        if not dt.strftime('%d') == 1:
                            continue
                        self.log('Loading Magazine section')
                    self.log(section)
--- a/recipes/wsj_mag.recipe
+++ b/recipes/wsj_mag.recipe
@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
 def media_bucket(x):
    if x.get('type', '') == 'image':
-        return '<img src="{}"><div class="figc">{}</div>\n'.format(
+        if x.get('subtype', '') == 'graphic':
-            x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
+            return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
                x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
            )
        return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
            x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
        )
    if x.get('type', '') == 'video':
-        return '<a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
+        return '<br><a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
-            x['share_link'], x['thumbnail_url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
+            x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
        )
    return
 class WSJ(BasicNewsRecipe):
    title = 'WSJ. Magazine'
    __author__ = 'unkn0wn'
@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe):
                m_itm = soup.findAll('panel', attrs={'class':'media-item'})
                if i_lst and m_itm:
                    for x, y in list(zip_longest(m_itm, i_lst)):
                        x.name = 'p'
                        x.insert_after(BeautifulSoup(y, 'html.parser'))
        return soup