Update barrons.recipe

This commit is contained in:
unkn0w7n 2024-02-01 18:04:02 +05:30
parent e1956f3cff
commit ad85cc9734

View File

@ -1,4 +1,5 @@
from calibre.web.feeds.news import BasicNewsRecipe, classes, prefixed_classes
from collections import defaultdict
from datetime import date
import re
@ -24,17 +25,17 @@ class barrons(BasicNewsRecipe):
img {display:block; margin:0 auto;}
.figc { font-size:small; text-align:center; }
.imageCredit { color:#404040; font-size:x-small; }
.headline__category { font-size:small; color:#404040; }
.headline__category, .article-prebody { font-size:small; color:#404040; }
.sub-head { color:#202020; }
'''
keep_only_tags = [
classes('headline articleLead'),
classes('headline articleLead article-prebody'),
dict(name='section', attrs={'subscriptions-section':'content'})
]
remove_tags = [
dict(name=['meta', 'link', 'svg', 'button', 'i-amphtml-sizer']),
classes('wsj-ad dynamic-inset-overflow')
classes('wsj-ad dynamic-inset-overflow newsletter-inset')
]
def preprocess_html(self, soup):
@ -42,6 +43,9 @@ class barrons(BasicNewsRecipe):
figc['class'] = 'figc'
for p in figc.findAll('p'):
p.name = 'div'
for by in soup.findAll(**classes('byline')):
for p in by.findAll('p'):
p.name = 'span'
for h2 in soup.findAll('h2'):
h2.name = 'h4'
for iframe in soup.findAll('amp-iframe'):
@ -54,7 +58,11 @@ class barrons(BasicNewsRecipe):
iframe['src'] = 'https://datawrapper.dwcdn.net/' + data.group(1) + '/full.png'
for amp in soup.findAll('amp-img'):
if not amp.find('img', attrs={'src':True}):
if amp.has_attr('src'):
amp['src'] = amp['src'] + '&pixel_ratio=1.5'
amp.name = 'img'
else:
amp.img['src'] = amp.img['src'] + '&pixel_ratio=1.5'
return soup
def get_browser(self, *args, **kwargs):
@ -73,9 +81,15 @@ class barrons(BasicNewsRecipe):
self.log(self.timefmt)
self.cover_url = issue.img['src'].split('?')[0]
ans = []
ans = defaultdict(list)
for articles in archive.findAll(**prefixed_classes('BarronsTheme--story--')):
section = 'Magazine'
strap = articles.find_previous_sibling(**prefixed_classes('BarronsTheme--strap--'))
if strap:
label = strap.find(**prefixed_classes('BarronsTheme--label--'))
if label:
section = self.tag_to_string(label).strip()
a = articles.find(**prefixed_classes('BarronsTheme--heading'))
title = self.tag_to_string(a).strip()
url = a.a['href']
@ -90,8 +104,8 @@ class barrons(BasicNewsRecipe):
if summ:
desc += ' | ' + self.tag_to_string(summ)
self.log('\t', title, ' ', url, '\n\t', desc)
ans.append({'title': title, 'url': url, 'description': desc})
return [('Articles', ans)]
ans[section].append({'title': title, 'url': url, 'description': desc})
return [(section, articles) for section, articles in ans.items()]
def print_version(self, url):
return url.split('?')[0].replace('/articles/', '/amp/articles/')