Cover for Irish Times & other small changes.
This commit is contained in:
unkn0w7n 2023-05-17 23:55:41 +05:30
parent f9aa200d72
commit 5b0c506b07
5 changed files with 29 additions and 1 deletions

View File

@ -27,7 +27,7 @@ class TheHindu(BasicNewsRecipe):
extra_css = '''
.caption {font-size:small; text-align:center;}
.author, .dateLine {font-size:small; font-weight:bold;}
.subhead, .subhead_lead {font-weight:bold;}
.subhead, .subhead_lead, .bold {font-weight:bold;}
img {display:block; margin:0 auto;}
.italic {font-style:italic; color:#202020;}
'''

Binary file not shown.

Before

Width:  |  Height:  |  Size: 612 B

After

Width:  |  Height:  |  Size: 343 B

View File

@ -39,6 +39,23 @@ class IrishTimes(BasicNewsRecipe):
classes('sm-promo-headline top-table-list-container single-divider interstitial-link'),
]
remove_attributes = ['width', 'height']
def get_cover_url(self):
from datetime import date
cover = 'https://img.kiosko.net/' + date.today().strftime('%Y/%m/%d') + '/ie/irish_times.750.jpg'
br = BasicNewsRecipe.get_browser(self, verify_ssl_certificates=False)
try:
br.open(cover)
except:
index = 'https://en.kiosko.net/ie/np/irish_times.html'
soup = self.index_to_soup(index)
for image in soup.find('img', attrs={'src': lambda x: x and x.endswith('750.jpg')}):
if image['src'].startswith('/'):
return 'https:' + image['src']
return image['src']
self.log("\nCover unavailable")
cover = None
return cover
def parse_index(self):
soup = self.index_to_soup('https://www.irishtimes.com/')

View File

@ -66,6 +66,15 @@ class IrishTimes(BasicNewsRecipe):
pt.close()
return pt.name
def __init__(self, *args, **kwargs):
BasicNewsRecipe.__init__(self, *args, **kwargs)
if self.output_profile.short_name.startswith('kindle'):
# Reduce image sizes to get file size below amazon's email
# sending threshold
self.web2disk_options.compress_news_images = True
self.web2disk_options.compress_news_images_auto_size = 5
self.log.warn('Kindle Output profile being used, reducing image quality to keep file size below amazon email threshold')
feeds = []
sections = [

View File

@ -129,6 +129,8 @@ class LiveMint(BasicNewsRecipe):
return raw
def preprocess_html(self, soup):
for h2 in soup.find('h2'):
h2.name = 'p'
for span in soup.findAll('figcaption'):
span['id'] = 'img-cap'
for auth in soup.findAll('span', attrs={'class':['articleInfo pubtime','articleInfo author']}):