From f851d78e6a349f3235a32e53c69b880ef6fe0463 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 24 Feb 2023 21:21:16 +0530 Subject: [PATCH] Update New York Magazine --- recipes/nymag.recipe | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/recipes/nymag.recipe b/recipes/nymag.recipe index 2bc8166ddb..46050559af 100644 --- a/recipes/nymag.recipe +++ b/recipes/nymag.recipe @@ -24,12 +24,10 @@ class NewYorkMagazine(BasicNewsRecipe): remove_javascript = True encoding = 'utf-8' keep_only_tags = [ - classes('lede-text headline-primary article-timestamp by-authors'), - dict(id='main'), - dict(itemprop='articleBody'), + dict(name='article', attrs={'class':lambda x: x and 'article' in x.split()}) ] remove_tags = [ - classes('related-stories start-discussion'), + classes('related-stories start-discussion newsletter-flex-text comments-link tags related secondary-area'), dict(id=['minibrowserbox', 'article-related', 'article-tools']) ] remove_attributes = ['srcset'] @@ -70,6 +68,9 @@ class NewYorkMagazine(BasicNewsRecipe): return feeds def preprocess_html(self, soup): + if lede := soup.findAll('div', attrs={'class':lambda x: x and 'lede-image-wrapper' in x.split()}): + if len(lede) > 1: + lede[1].extract() for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup