This commit is contained in:
Kovid Goyal 2024-06-28 10:43:38 +05:30
commit 11e3d27768
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 20 additions and 12 deletions

View File

@ -51,7 +51,7 @@ class Harpers(BasicNewsRecipe):
for img in soup.findAll('img', attrs={'srcset':True}): for img in soup.findAll('img', attrs={'srcset':True}):
for src in img['srcset'].split(','): for src in img['srcset'].split(','):
if '768w' in src: if '768w' in src:
img['src'] = img['src'].split()[0] img['src'] = src.split()[0]
return soup return soup
def parse_index(self): def parse_index(self):
@ -67,12 +67,12 @@ class Harpers(BasicNewsRecipe):
for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(url + '/')}): for a in soup.findAll('a', attrs={'href':lambda x: x and x.startswith(url + '/')}):
if not a.find('img') and a.find(['h1', 'h2', 'h3', 'h4']): if not a.find('img') and a.find(['h1', 'h2', 'h3', 'h4']):
url = a['href'] url = a['href']
title = self.tag_to_string(a) title = self.tag_to_string(a).strip()
desc = '' desc = ''
div = a.findParent('div').find('div', attrs={'class':'byline'}) div = a.findParent('div').find('div', attrs={'class':'byline'})
if div: if div:
desc = self.tag_to_string(div) desc = self.tag_to_string(div).strip()
self.log('\t', title, '\n\t', desc, '\n\t', url) self.log(' ', title, '\n\t', desc[:-1], '\n\t', url)
ans.append({'title': title, 'description': desc, 'url': url}) ans.append({'title': title, 'description': desc, 'url': url})
return [('Articles', ans)] return [('Articles', ans)]

View File

@ -7,8 +7,9 @@ from calibre.web.feeds.news import BasicNewsRecipe
def re_html(y): def re_html(y):
soup = BeautifulSoup(y.rstrip(), "html.parser") if y:
return soup.text soup = BeautifulSoup(y.rstrip(), "html.parser")
return soup.text
def get_cont(x): def get_cont(x):
url = x['url'] url = x['url']
@ -36,6 +37,7 @@ class tls(BasicNewsRecipe):
encoding = 'utf-8' encoding = 'utf-8'
language = 'en_GB' language = 'en_GB'
masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg' masthead_url = 'https://www.the-tls.co.uk/wp-content/uploads/sites/7/2019/11/Smaller-Logo.jpg'
remove_empty_feeds = True
extra_css = ''' extra_css = '''
.label { font-size:small; color:#404040; } .label { font-size:small; color:#404040; }
@ -58,12 +60,15 @@ class tls(BasicNewsRecipe):
feeds = [] feeds = []
self.log('A note from the Editor') if data['featuredarticle']:
feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])])) self.log('A note from the Editor')
feeds.append(('A note from the Editor', [get_cont(data['featuredarticle'])]))
cont = data['contents'] cont = data['contents']
for c in cont: for c in cont:
section = re_html(cont[c]['articleheader']['title']) section = re_html(cont[c]['articleheader']['title'])
if not section:
continue
self.log(section) self.log(section)
articles = [] articles = []
for arts in cont[c]['articleslist']: for arts in cont[c]['articleslist']:
@ -84,10 +89,13 @@ class tls(BasicNewsRecipe):
auth = lede = '' auth = lede = ''
label = '<div class="label">{}</div>\n' label = '<div class="label">{}</div>\n'
if prim['label']['category']['text']: l = prim['label']
label = label.format(prim['label']['articletype'] + ' | ' + prim['label']['category']['text']) if l['category']['text'] and l['articletype']:
else: label = label.format(l['articletype'] + ' | ' + l['category']['text'])
label = label.format(prim['label']['articletype']) elif l['articletype']:
label = label.format(l['articletype'])
elif l['category']['text']:
label = label.format(l['category']['text'])
if prim['byline']['text']: if prim['byline']['text']:
auth = '<p class="auth"><a href="{}">'.format(prim['byline']['link']) + prim['byline']['text'] + '</a></p>\n' auth = '<p class="auth"><a href="{}">'.format(prim['byline']['link']) + prim['byline']['text'] + '</a></p>\n'