mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Times Literary Supplement by unkn0wn
Merge branch 'master' of https://github.com/unkn0w7n/calibre
This commit is contained in:
commit
26e67c96ed
BIN
recipes/icons/tls_mag.png
Normal file
BIN
recipes/icons/tls_mag.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 234 B |
115
recipes/tls_mag.recipe
Normal file
115
recipes/tls_mag.recipe
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
import json, re
|
||||||
|
from calibre import browser
|
||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
|
def re_html(y):
|
||||||
|
soup = BeautifulSoup(y.rstrip(), "html.parser")
|
||||||
|
return soup.text
|
||||||
|
|
||||||
|
def get_cont(x):
|
||||||
|
url = x['url']
|
||||||
|
title = x['headline']
|
||||||
|
desc = x['standfirst']
|
||||||
|
if x['byline']['text']:
|
||||||
|
desc = 'By ' + x['byline']['text'] + ' | ' + desc
|
||||||
|
print(' ', re_html(title), '\n\t', re_html(desc), '\n\t', url)
|
||||||
|
return ({ 'title': re_html(title), 'description': re_html(desc), 'url': url })
|
||||||
|
|
||||||
|
def get_id(url):
|
||||||
|
rq = browser().open(url)
|
||||||
|
return re.search('\?p=(\S+)>', str(rq.info())).group(1)
|
||||||
|
|
||||||
|
|
||||||
|
class tls(BasicNewsRecipe):
|
||||||
|
title = 'Times Literary Supplement'
|
||||||
|
__author__ = 'unkn0wn'
|
||||||
|
description = (
|
||||||
|
'TLS, world’s leading journal for literature and ideas. Every week, we publish book reviews, book extracts, '
|
||||||
|
'essays and poems from leading writers from around the world. We cover far more than just literature, featuring '
|
||||||
|
'major articles on subjects from anthropology to zoology, philosophy to politics, comedy to psychology. Each week, '
|
||||||
|
'we also review the latest in fiction, film, opera, theatre, dance, radio and television.'
|
||||||
|
)
|
||||||
|
encoding = 'utf-8'
|
||||||
|
language = 'en_GB'
|
||||||
|
masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg'
|
||||||
|
|
||||||
|
extra_css = '''
|
||||||
|
.label { font-size:small; color:#404040; }
|
||||||
|
.figc { font-size:small; text-align:center; }
|
||||||
|
.desc { font-style:italic; color:#202020; }
|
||||||
|
.auth { font-size:small; }
|
||||||
|
em, blockquote { color:#202020; }
|
||||||
|
.det { font-size:small; color:#202020; }
|
||||||
|
'''
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
issue = 'https://www.the-tls.co.uk/issues/current-issue/'
|
||||||
|
url = 'https://www.the-tls.co.uk/wp-json/tls/v2/contents-page/' + get_id(issue)
|
||||||
|
raw = self.index_to_soup(url, raw=True)
|
||||||
|
data = json.loads(raw)
|
||||||
|
self.cover_url = data['featuredimage']['full_image'] + '?w600'
|
||||||
|
self.timefmt = ' [' + data['issuedateline']['issuedate'] + ']'
|
||||||
|
self.description = 'Issue ' + data['issuedateline']['issuenumber']
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
|
||||||
|
self.log('A note from the Editor')
|
||||||
|
feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
|
||||||
|
|
||||||
|
cont = data['contents']
|
||||||
|
for c in cont:
|
||||||
|
section = re_html(cont[c]['articleheader']['title'])
|
||||||
|
self.log(section)
|
||||||
|
articles = []
|
||||||
|
for arts in cont[c]['articleslist']:
|
||||||
|
articles.append(get_cont(arts))
|
||||||
|
if articles:
|
||||||
|
feeds.append((section, articles))
|
||||||
|
return feeds
|
||||||
|
|
||||||
|
def print_version(self, url):
|
||||||
|
return 'https://www.the-tls.co.uk/wp-json/tls/v2/single-article/' + get_id(url)
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw, *a):
|
||||||
|
data = json.loads(raw)
|
||||||
|
prim = data['articleIntroPrimary']
|
||||||
|
title = '<h1>' + prim['headline'] + '</h1>\n'
|
||||||
|
desc = '<p class="desc">' + prim['standfirst'] + '</p>\n'
|
||||||
|
|
||||||
|
auth = lede = ''
|
||||||
|
|
||||||
|
label = '<div class="label">{}</div>\n'
|
||||||
|
if prim['label']['category']['text']:
|
||||||
|
label = label.format(prim['label']['articletype'] + ' | ' + prim['label']['category']['text'])
|
||||||
|
else:
|
||||||
|
label = label.format(prim['label']['articletype'])
|
||||||
|
|
||||||
|
if prim['byline']['text']:
|
||||||
|
auth = '<p class="auth"><a href="{}">'.format(prim['byline']['link']) + prim['byline']['text'] + '</a></p>\n'
|
||||||
|
|
||||||
|
bks = ''
|
||||||
|
if data['bookdetails']:
|
||||||
|
for a in data['bookdetails']:
|
||||||
|
bks += '<br>'
|
||||||
|
for x, y in a.items():
|
||||||
|
if isinstance(y, str):
|
||||||
|
if x == 'imageurl':
|
||||||
|
bks += '<img src="{}">'.format(y)
|
||||||
|
elif y:
|
||||||
|
bks += '<div class="det">' + y + '</div>\n'
|
||||||
|
bks += '<br>'
|
||||||
|
|
||||||
|
if 'full_image' in data['leadimage'] and data['leadimage']['full_image']:
|
||||||
|
lede = '<br><img src="{}"><div class="figc">{}</div>'.format(
|
||||||
|
data['leadimage']['full_image'] + '?w600', data['leadimage']['imagecaption'] + ' <i>' \
|
||||||
|
+ data['leadimage']['imagecredit'] + '</i>'
|
||||||
|
)
|
||||||
|
|
||||||
|
body = data['content']
|
||||||
|
|
||||||
|
html = '<html><body><div>' \
|
||||||
|
+ label + title + desc + auth + lede + bks + body + \
|
||||||
|
'</div></body></html>'
|
||||||
|
return BeautifulSoup(html).prettify()
|
@ -13,12 +13,16 @@ past_edition = None
|
|||||||
|
|
||||||
def media_bucket(x):
|
def media_bucket(x):
|
||||||
if x.get('type', '') == 'image':
|
if x.get('type', '') == 'image':
|
||||||
return '<img src="{}"><div class="figc">{}</div>\n'.format(
|
if x.get('subtype', '') == 'graphic':
|
||||||
|
return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
||||||
x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
)
|
)
|
||||||
|
return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
||||||
|
x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
|
)
|
||||||
if x.get('type', '') == 'video':
|
if x.get('type', '') == 'video':
|
||||||
return '<a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
|
return '<br><a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
|
||||||
x['share_link'], x['thumbnail_url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -90,7 +94,6 @@ class WSJ(BasicNewsRecipe):
|
|||||||
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
|
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
|
||||||
if i_lst and m_itm:
|
if i_lst and m_itm:
|
||||||
for x, y in list(zip_longest(m_itm, i_lst)):
|
for x, y in list(zip_longest(m_itm, i_lst)):
|
||||||
x.name = 'p'
|
|
||||||
x.insert_after(BeautifulSoup(y, 'html.parser'))
|
x.insert_after(BeautifulSoup(y, 'html.parser'))
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
@ -141,9 +144,9 @@ class WSJ(BasicNewsRecipe):
|
|||||||
break
|
break
|
||||||
|
|
||||||
dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
|
dt = datetime.fromisoformat(date[:-1]) + timedelta(seconds=time.timezone)
|
||||||
dt = dt.strftime('%b %d, %Y')
|
dt_ = dt.strftime('%b %d, %Y')
|
||||||
self.log('Downloading ', dt)
|
self.log('Downloading ', dt_)
|
||||||
self.timefmt = ' [' + dt + ']'
|
self.timefmt = ' [' + dt_ + ']'
|
||||||
|
|
||||||
feeds = []
|
feeds = []
|
||||||
|
|
||||||
@ -153,7 +156,7 @@ class WSJ(BasicNewsRecipe):
|
|||||||
if '-pages_' in k:
|
if '-pages_' in k:
|
||||||
section = k.split('-pages_')[0].replace('_', ' ')
|
section = k.split('-pages_')[0].replace('_', ' ')
|
||||||
if 'MAGAZINE' in section:
|
if 'MAGAZINE' in section:
|
||||||
if not datetime.now().strftime("%d") == 1:
|
if not dt.strftime('%d') == 1:
|
||||||
continue
|
continue
|
||||||
self.log('Loading Magazine section')
|
self.log('Loading Magazine section')
|
||||||
self.log(section)
|
self.log(section)
|
||||||
|
@ -9,16 +9,19 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
|
|||||||
|
|
||||||
def media_bucket(x):
|
def media_bucket(x):
|
||||||
if x.get('type', '') == 'image':
|
if x.get('type', '') == 'image':
|
||||||
return '<img src="{}"><div class="figc">{}</div>\n'.format(
|
if x.get('subtype', '') == 'graphic':
|
||||||
|
return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
||||||
x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
x['manifest-url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
)
|
)
|
||||||
|
return '<br><img src="{}"><div class="figc">{}</div>\n'.format(
|
||||||
|
x['manifest-url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
|
)
|
||||||
if x.get('type', '') == 'video':
|
if x.get('type', '') == 'video':
|
||||||
return '<a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
|
return '<br><a href="{}"><img src="{}"></a><div class="figc">{}</div>\n'.format(
|
||||||
x['share_link'], x['thumbnail_url'], x['caption'] + '<i> ' + x['credit'] + '</i>'
|
x['share_link'], x['thumbnail_url'].split('?')[0] + '?width=600', x['caption'] + '<i> ' + x['credit'] + '</i>'
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
class WSJ(BasicNewsRecipe):
|
class WSJ(BasicNewsRecipe):
|
||||||
title = 'WSJ. Magazine'
|
title = 'WSJ. Magazine'
|
||||||
__author__ = 'unkn0wn'
|
__author__ = 'unkn0wn'
|
||||||
@ -87,7 +90,6 @@ class WSJ(BasicNewsRecipe):
|
|||||||
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
|
m_itm = soup.findAll('panel', attrs={'class':'media-item'})
|
||||||
if i_lst and m_itm:
|
if i_lst and m_itm:
|
||||||
for x, y in list(zip_longest(m_itm, i_lst)):
|
for x, y in list(zip_longest(m_itm, i_lst)):
|
||||||
x.name = 'p'
|
|
||||||
x.insert_after(BeautifulSoup(y, 'html.parser'))
|
x.insert_after(BeautifulSoup(y, 'html.parser'))
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user