From e8d08955d61c87b493feebd8eb7bd6f752e05280 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:54:39 +0530 Subject: [PATCH 1/4] Update WSJ --- recipes/wsj.recipe | 21 ++++++++++++--------- recipes/wsj_mag.recipe | 14 ++++++++------ 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 958c041815..a0e2ac25e9 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -13,12 +13,16 @@ past_edition = None def media_bucket(x): if x.get('type', '') == 'image': - return '
{}
\n'.format( - x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + if x.get('subtype', '') == 'graphic': + return '
{}
\n'.format( + x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + ) + return '
{}
\n'.format( + x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) if x.get('type', '') == 'video': - return '
{}
\n'.format( - x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + '' + return '
{}
\n'.format( + x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) return @@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe): m_itm = soup.findAll('panel', attrs={'class':'media-item'}) if i_lst and m_itm: for x, y in list(zip_longest(m_itm, i_lst)): - x.name = 'p' x.insert_after(BeautifulSoup(y, 'html.parser')) return soup @@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe): break dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone) - dt = dt.strftime('%b %d, %Y') - self.log('Downloading ', dt) - self.timefmt = ' [' + dt + ']' + dt_ = dt.strftime('%b %d, %Y') + self.log('Downloading ', dt_) + self.timefmt = ' [' + dt_ + ']' feeds = [] @@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe): if '-pages_' in k: section = k.split('-pages_')[0].replace('_', ' ') if 'MAGAZINE' in section: - if not datetime.now().strftime("%d") == 1: + if not dt.strftime('%d') == 1: continue self.log('Loading Magazine section') self.log(section) diff --git a/recipes/wsj_mag.recipe b/recipes/wsj_mag.recipe index 718ee49958..0a82cab0ae 100644 --- a/recipes/wsj_mag.recipe +++ b/recipes/wsj_mag.recipe @@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes def media_bucket(x): if x.get('type', '') == 'image': - return '
{}
\n'.format( - x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + if x.get('subtype', '') == 'graphic': + return '
{}
\n'.format( + x['manifest-url'], x['caption'] + ' ' + x['credit'] + '' + ) + return '
{}
\n'.format( + x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) if x.get('type', '') == 'video': - return '
{}
\n'.format( - x['share_link'], x['thumbnail_url'], x['caption'] + ' ' + x['credit'] + '' + return '
{}
\n'.format( + x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + ' ' + x['credit'] + '' ) return - class WSJ(BasicNewsRecipe): title = 'WSJ. Magazine' __author__ = 'unkn0wn' @@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe): m_itm = soup.findAll('panel', attrs={'class':'media-item'}) if i_lst and m_itm: for x, y in list(zip_longest(m_itm, i_lst)): - x.name = 'p' x.insert_after(BeautifulSoup(y, 'html.parser')) return soup From 03a4c71dad19571b0ec54605f6e2e6bc80f03ad9 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:55:28 +0530 Subject: [PATCH 2/4] Create Times Literary Supplement --- recipes/icons/tls_mag.png | Bin 0 -> 234 bytes recipes/tls_mag.recipe | 114 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 recipes/icons/tls_mag.png create mode 100644 recipes/tls_mag.recipe diff --git a/recipes/icons/tls_mag.png b/recipes/icons/tls_mag.png new file mode 100644 index 0000000000000000000000000000000000000000..de3e20b53107d775e74163b7841081be13affc4b GIT binary patch literal 234 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!63?wyl`GbK}V1Q4E>;M1%fy^&ozRsIBx3#sk zy1MGXf&J0Zk+HEcTefWZ^XHGBpRcpClbM;xt5+}Iy?d9Qo>o*;xMay<8yl-jmoDA7 zaedXQl@A^~m^EwW$&)Ah`}?1@R5<`m4Dobv43U_+_WX9S1_cr33m-C?STh&>``;dw zkf{FH_M||9$CPzopr07Erk>i_@% literal 0 HcmV?d00001 diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe new file mode 100644 index 0000000000..274a70485a --- /dev/null +++ b/recipes/tls_mag.recipe @@ -0,0 +1,114 @@ +import json, re +from calibre import browser +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import BeautifulSoup + + +def re_html(y): + soup = BeautifulSoup(y.rstrip(), "html.parser") + return soup.text + +def get_cont(x): + url = x['url'] + title = x['headline'] + desc = x['standfirst'] + if x['byline']['text']: + desc = 'By ' + x['byline']['text'] + ' | ' + desc + print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url) + return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url }) + +def get_id(url): + rq = browser().open(url) + return re.search('\?p=(\S+)>', str(rq.info())).group(1) + + +class tls(BasicNewsRecipe): + title = 'Times Literary Supplement' + description = ( + 'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, ' + 'essays and poems from leading writers from around the world. We cover far more than just literature, featuring ' + 'major articles on subjects from anthropology to zoology, philosophy to politics, comedy to psychology. Each week, ' + 'we also review the latest in fiction, film, opera, theatre, dance, radio and television.' + ) + encoding = 'utf-8' + language = 'en_GB' + masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg' + + extra_css = ''' + .label { font-size:small; color:#404040; } + .figc { font-size:small; text-align:center; } + .desc { font-style:italic; color:#202020; } + .auth { font-size:small; } + em, blockquote { color:#202020; } + .det { font-size:small; color:#202020; } + ''' + + def parse_index(self): + issue = 'https://www.the-tls.co.uk/issues/current-issue/' + url = 'https://www.the-tls.co.uk/wp-json/tls/v2/contents-page/' + get_id(issue) + raw = self.index_to_soup(url, raw=True) + data = json.loads(raw) + self.cover_url = data['featuredimage']['full_image'] + '?w600' + self.timefmt = ' [' + data['issuedateline']['issuedate'] + ']' + self.description = 'Issue ' + data['issuedateline']['issuenumber'] + + feeds = [] + + self.log('A note from the Editor') + feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])])) + + cont = data['contents'] + for c in cont: + section = re_html(cont[c]['articleheader']['title']) + self.log(section) + articles = [] + for arts in cont[c]['articleslist']: + articles.append(get_cont(arts)) + if articles: + feeds.append((section, articles)) + return feeds + + def print_version(self, url): + return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url) + + def preprocess_raw_html(self, raw, *a): + data = json.loads(raw) + prim = data['articleIntroPrimary'] + title = '

' + prim['headline'] + '

\n' + desc = '

' + prim['standfirst'] + '

\n' + + auth = lede = '' + + label = '
{}
\n' + if prim['label']['category']['text']: + label = label.format(prim['label']['articletype'] + ' | ' + prim['label']['category']['text']) + else: + label = label.format(prim['label']['articletype']) + + if prim['byline']['text']: + auth = '

'.format(prim['byline']['link']) + prim['byline']['text'] + '

\n' + + bks = '' + if data['bookdetails']: + for a in data['bookdetails']: + bks += '
' + for x, y in a.items(): + if isinstance(y, str): + if x == 'imageurl': + bks += ''.format(y) + elif y: + bks += '
' + y + '
\n' + bks += '
' + + if 'full_image' in data['leadimage'] and data['leadimage']['full_image']: + lede = '
{}
'.format( + data['leadimage']['full_image'] + '?w600', data['leadimage']['imagecaption'] + ' ' \ + + data['leadimage']['imagecredit'] + '' + ) + + body = data['content'] + + html = '
' \ + + label + title + desc + auth + lede + bks + body + \ + '
' + return BeautifulSoup(html).prettify() From 616c938f829758a8d92bd990f6365d6eaf39585d Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:56:52 +0530 Subject: [PATCH 3/4] ... --- recipes/tls_mag.recipe | 1 + 1 file changed, 1 insertion(+) diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index 274a70485a..d0c0d6a502 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -24,6 +24,7 @@ def get_id(url): class tls(BasicNewsRecipe): title = 'Times Literary Supplement' + __author__ = 'unkn0wn' description = ( 'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, ' 'essays and poems from leading writers from around the world. We cover far more than just literature, featuring ' From a9b085ef5eaa49ad0fad78e1bb510b45755c8f41 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Wed, 26 Jun 2024 18:57:47 +0530 Subject: [PATCH 4/4] Update tls_mag.recipe --- recipes/tls_mag.recipe | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/recipes/tls_mag.recipe b/recipes/tls_mag.recipe index d0c0d6a502..15dd30cd43 100644 --- a/recipes/tls_mag.recipe +++ b/recipes/tls_mag.recipe @@ -24,7 +24,7 @@ def get_id(url): class tls(BasicNewsRecipe): title = 'Times Literary Supplement' - __author__ = 'unkn0wn' + __author__ = 'unkn0wn' description = ( 'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, ' 'essays and poems from leading writers from around the world. We cover far more than just literature, featuring '