From 614be0d04495d1d119d8dea6950d4faf24827748 Mon Sep 17 00:00:00 2001 From: hehonghui Date: Fri, 27 Nov 2020 12:33:41 +0800 Subject: [PATCH] fixed bug : can't parse atlantic's article images --- recipes/atlantic.recipe | 7 ++++++- recipes/atlantic_com.recipe | 7 ++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/recipes/atlantic.recipe b/recipes/atlantic.recipe index 1c17ee0d0d..dbdd5af53b 100644 --- a/recipes/atlantic.recipe +++ b/recipes/atlantic.recipe @@ -87,7 +87,12 @@ class TheAtlantic(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-srcset': True}): - img['src'] = img['data-srcset'].split()[0] + #img['src'] = img['data-srcset'].split()[0] + data_srcset = img['data-srcset'] + if ',' in data_srcset: + img['src'] = data_srcset.split(',')[0] + else: + img['src'] = data_srcset.split()[0] for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup diff --git a/recipes/atlantic_com.recipe b/recipes/atlantic_com.recipe index c224e13c49..d96dd85b39 100644 --- a/recipes/atlantic_com.recipe +++ b/recipes/atlantic_com.recipe @@ -87,7 +87,12 @@ class TheAtlantic(BasicNewsRecipe): def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'data-srcset': True}): - img['src'] = img['data-srcset'].split()[0] + #img['src'] = img['data-srcset'].split()[0] + data_srcset = img['data-srcset'] + if ',' in data_srcset: + img['src'] = data_srcset.split(',')[0] + else: + img['src'] = data_srcset.split()[0] for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup