From 1c5464a0247933dc56dccb96cfb0fb08d2fa2760 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 13 Jul 2021 22:22:01 +0530 Subject: [PATCH] Fix loading of comics from boston globe --- recipes/boston.com.recipe | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/recipes/boston.com.recipe b/recipes/boston.com.recipe index 1c48b70848..78144cf6ac 100644 --- a/recipes/boston.com.recipe +++ b/recipes/boston.com.recipe @@ -118,6 +118,16 @@ class BostonGlobeSubscription(BasicNewsRecipe): feeds.append(('Comics', articles)) return feeds + def preprocess_raw_html(self, raw_html, url): + soup = self.index_to_soup(raw_html) + meta = soup.find(attrs={'name': 'description'}, content=True) + if meta is not None and meta['content'].startswith('Comics: '): + meta = soup.find(property='og:image', content=True) + img_url = 'https://cloudfront-us-east-1.images.arcpublishing.com/bostonglobe/' + meta['content'].split('/')[-1] + title = self.tag_to_string(soup.find('title')) + raw_html = '

{}

'.format(title, img_url) + return raw_html + def preprocess_html(self, soup): for img in soup.findAll('img'): fs = img.get('data-src')