From e8d08955d61c87b493feebd8eb7bd6f752e05280 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:54:39 +0530
Subject: [PATCH 1/4] Update WSJ
---
recipes/wsj.recipe | 21 ++++++++++++---------
recipes/wsj_mag.recipe | 14 ++++++++------
2 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe
index 958c041815..a0e2ac25e9 100644
--- a/recipes/wsj.recipe
+++ b/recipes/wsj.recipe
@@ -13,12 +13,16 @@ past_edition = None
def media_bucket(x):
if x.get('type', '') == 'image':
- return '
{}
\n'.format(
- x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ if x.get('subtype', '') == 'graphic':
+ return '

{}
\n'.format(
+ x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ )
+ return '

{}
\n'.format(
+ x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
if x.get('type', '') == 'video':
- return '
{}
\n'.format(
- x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + ''
+ return '

{}
\n'.format(
+ x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
return
@@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe):
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
if i_lst and m_itm:
for x, y in list(zip_longest(m_itm, i_lst)):
- x.name = 'p'
x.insert_after(BeautifulSoup(y, 'html.parser'))
return soup
@@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe):
break
dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
- dt = dt.strftime('%b %d, %Y')
- self.log('Downloading ', dt)
- self.timefmt = ' [' + dt + ']'
+ dt_ = dt.strftime('%b %d, %Y')
+ self.log('Downloading ', dt_)
+ self.timefmt = ' [' + dt_ + ']'
feeds = []
@@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe):
if '-pages_' in k:
section = k.split('-pages_')[0].replace('_', ' ')
if 'MAGAZINE' in section:
- if not datetime.now().strftime("%d") == 1:
+ if not dt.strftime('%d') == 1:
continue
self.log('Loading Magazine section')
self.log(section)
diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe
index 718ee49958..0a82cab0ae 100644
--- a/recipes/wsj_mag.recipe
+++ b/recipes/wsj_mag.recipe
@@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
def media_bucket(x):
if x.get('type', '') == 'image':
- return '
{}
\n'.format(
- x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ if x.get('subtype', '') == 'graphic':
+ return '

{}
\n'.format(
+ x['manifest-url'], x['caption'] + ' ' + x['credit'] + ''
+ )
+ return '

{}
\n'.format(
+ x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
if x.get('type', '') == 'video':
- return '
{}
\n'.format(
- x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + ''
+ return '

{}
\n'.format(
+ x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + ''
)
return
-
class WSJ(BasicNewsRecipe):
title = 'WSJ. Magazine'
__author__ = 'unkn0wn'
@@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe):
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
if i_lst and m_itm:
for x, y in list(zip_longest(m_itm, i_lst)):
- x.name = 'p'
x.insert_after(BeautifulSoup(y, 'html.parser'))
return soup
From 03a4c71dad19571b0ec54605f6e2e6bc80f03ad9 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:55:28 +0530
Subject: [PATCH 2/4] Create Times Literary Supplement
---
recipes/icons/tls_mag.png | Bin 0 -> 234 bytes
recipes/tls_mag.recipe | 114 ++++++++++++++++++++++++++++++++++++++
2 files changed, 114 insertions(+)
create mode 100644 recipes/icons/tls_mag.png
create mode 100644 recipes/tls_mag.recipe
diff --git a/recipes/icons/tls_mag.png b/recipes/icons/tls_mag.png
new file mode 100644
index 0000000000000000000000000000000000000000..de3e20b53107d775e74163b7841081be13affc4b
GIT binary patch
literal 234
zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbK}V1Q4E>;M1%fy^&ozRsIBx3#sk
zy1MGXf&J0Zk+HEcTefWZ^XHGBpRcpClbM;xt5+}Iy?d9Qo>o*;xMay<8yl-jmoDA7
zaedXQl@A^~m^EwW$&)Ah`}?1@R5<`m4Dobv43U_+_WX9S1_cr33m-C?STh&>``;dw
zkf{FH_M||9$CPzopr07Erk>i_@%
literal 0
HcmV?d00001
diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe
new file mode 100644
index 0000000000..274a70485a
--- /dev/null
+++ b/recipes/tls_mag.recipe
@@ -0,0 +1,114 @@
+import json, re
+from calibre import browser
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import BeautifulSoup
+
+
+def re_html(y):
+ soup = BeautifulSoup(y.rstrip(), "html.parser")
+ return soup.text
+
+def get_cont(x):
+ url = x['url']
+ title = x['headline']
+ desc = x['standfirst']
+ if x['byline']['text']:
+ desc = 'By ' + x['byline']['text'] + ' | ' + desc
+ print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url)
+ return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url })
+
+def get_id(url):
+ rq = browser().open(url)
+ return re.search('\?p=(\S+)>', str(rq.info())).group(1)
+
+
+class tls(BasicNewsRecipe):
+ title = 'Times Literary Supplement'
+ description = (
+ 'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, '
+ 'essays and poems from leading writers from around the world. We cover far more than just literature, featuring '
+ 'major articles on subjects from anthropology to zoology, philosophy to politics, comedy to psychology. Each week, '
+ 'we also review the latest in fiction, film, opera, theatre, dance, radio and television.'
+ )
+ encoding = 'utf-8'
+ language = 'en_GB'
+ masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg'
+
+ extra_css = '''
+ .label { font-size:small; color:#404040; }
+ .figc { font-size:small; text-align:center; }
+ .desc { font-style:italic; color:#202020; }
+ .auth { font-size:small; }
+ em, blockquote { color:#202020; }
+ .det { font-size:small; color:#202020; }
+ '''
+
+ def parse_index(self):
+ issue = 'https://www.the-tls.co.uk/issues/current-issue/'
+ url = 'https://www.the-tls.co.uk/wp-json/tls/v2/contents-page/' + get_id(issue)
+ raw = self.index_to_soup(url, raw=True)
+ data = json.loads(raw)
+ self.cover_url = data['featuredimage']['full_image'] + '?w600'
+ self.timefmt = ' [' + data['issuedateline']['issuedate'] + ']'
+ self.description = 'Issue ' + data['issuedateline']['issuenumber']
+
+ feeds = []
+
+ self.log('A note from the Editor')
+ feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
+
+ cont = data['contents']
+ for c in cont:
+ section = re_html(cont[c]['articleheader']['title'])
+ self.log(section)
+ articles = []
+ for arts in cont[c]['articleslist']:
+ articles.append(get_cont(arts))
+ if articles:
+ feeds.append((section, articles))
+ return feeds
+
+ def print_version(self, url):
+ return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)
+
+ def preprocess_raw_html(self, raw, *a):
+ data = json.loads(raw)
+ prim = data['articleIntroPrimary']
+ title = '' + prim['headline'] + '
\n'
+ desc = '' + prim['standfirst'] + '
\n'
+
+ auth = lede = ''
+
+ label = '{}
\n'
+ if prim['label']['category']['text']:
+ label = label.format(prim['label']['articletype'] + ' | ' + prim['label']['category']['text'])
+ else:
+ label = label.format(prim['label']['articletype'])
+
+ if prim['byline']['text']:
+ auth = ''.format(prim['byline']['link']) + prim['byline']['text'] + '
\n'
+
+ bks = ''
+ if data['bookdetails']:
+ for a in data['bookdetails']:
+ bks += '
'
+ for x, y in a.items():
+ if isinstance(y, str):
+ if x == 'imageurl':
+ bks += '
'.format(y)
+ elif y:
+ bks += '' + y + '
\n'
+ bks += '
'
+
+ if 'full_image' in data['leadimage'] and data['leadimage']['full_image']:
+ lede = '

{}
'.format(
+ data['leadimage']['full_image'] + '?w600', data['leadimage']['imagecaption'] + ' ' \
+ + data['leadimage']['imagecredit'] + ''
+ )
+
+ body = data['content']
+
+ html = '' \
+ + label + title + desc + auth + lede + bks + body + \
+ '
'
+ return BeautifulSoup(html).prettify()
From 616c938f829758a8d92bd990f6365d6eaf39585d Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:56:52 +0530
Subject: [PATCH 3/4] ...
---
recipes/tls_mag.recipe | 1 +
1 file changed, 1 insertion(+)
diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe
index 274a70485a..d0c0d6a502 100644
--- a/recipes/tls_mag.recipe
+++ b/recipes/tls_mag.recipe
@@ -24,6 +24,7 @@ def get_id(url):
class tls(BasicNewsRecipe):
title = 'Times Literary Supplement'
+ __author__ = 'unkn0wn'
description = (
'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, '
'essays and poems from leading writers from around the world. We cover far more than just literature, featuring '
From a9b085ef5eaa49ad0fad78e1bb510b45755c8f41 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Wed, 26 Jun 2024 18:57:47 +0530
Subject: [PATCH 4/4] Update tls_mag.recipe
---
recipes/tls_mag.recipe | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe
index d0c0d6a502..15dd30cd43 100644
--- a/recipes/tls_mag.recipe
+++ b/recipes/tls_mag.recipe
@@ -24,7 +24,7 @@ def get_id(url):
class tls(BasicNewsRecipe):
title = 'Times Literary Supplement'
- __author__ = 'unkn0wn'
+ __author__ = 'unkn0wn'
description = (
'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, '
'essays and poems from leading writers from around the world. We cover far more than just literature, featuring '