diff --git a/recipes/nymag.recipe b/recipes/nymag.recipe index 2bc8166ddb..46050559af 100644 --- a/recipes/nymag.recipe +++ b/recipes/nymag.recipe @@ -24,12 +24,10 @@ class NewYorkMagazine(BasicNewsRecipe): remove_javascript = True encoding = 'utf-8' keep_only_tags = [ - classes('lede-text headline-primary article-timestamp by-authors'), - dict(id='main'), - dict(itemprop='articleBody'), + dict(name='article', attrs={'class':lambda x: x and 'article' in x.split()}) ] remove_tags = [ - classes('related-stories start-discussion'), + classes('related-stories start-discussion newsletter-flex-text comments-link tags related secondary-area'), dict(id=['minibrowserbox', 'article-related', 'article-tools']) ] remove_attributes = ['srcset'] @@ -70,6 +68,9 @@ class NewYorkMagazine(BasicNewsRecipe): return feeds def preprocess_html(self, soup): + if lede := soup.findAll('div', attrs={'class':lambda x: x and 'lede-image-wrapper' in x.split()}): + if len(lede) > 1: + lede[1].extract() for img in soup.findAll('img', attrs={'data-src': True}): img['src'] = img['data-src'] return soup