Fix loading of comics from boston globe

This commit is contained in:
Kovid Goyal 2021-07-13 22:22:01 +05:30
parent 4f9f6d6efc
commit 1c5464a024
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -118,6 +118,16 @@ class BostonGlobeSubscription(BasicNewsRecipe):
feeds.append(('Comics', articles))
return feeds
def preprocess_raw_html(self, raw_html, url):
soup = self.index_to_soup(raw_html)
meta = soup.find(attrs={'name': 'description'}, content=True)
if meta is not None and meta['content'].startswith('Comics: '):
meta = soup.find(property='og:image', content=True)
img_url = 'https://cloudfront-us-east-1.images.arcpublishing.com/bostonglobe/' + meta['content'].split('/')[-1]
title = self.tag_to_string(soup.find('title'))
raw_html = '<html><body><h1 class="headline |">{}</h1><div class="image |"><img src="{}"></div></body></html>'.format(title, img_url)
return raw_html
def preprocess_html(self, soup):
for img in soup.findAll('img'):
fs = img.get('data-src')