diff --git a/recipes/nytimes_sub.recipe b/recipes/nytimes_sub.recipe index c26a32f66e..85a282e6e2 100644 --- a/recipes/nytimes_sub.recipe +++ b/recipes/nytimes_sub.recipe @@ -79,19 +79,19 @@ class NewYorkTimes(BasicNewsRecipe): no_stylesheets = True compress_news_images = True compress_news_images_auto_size = 5 - preprocess_regexps = [(re.compile(r'(?s)'), lambda m: '')] remove_tags = [ dict(attrs={'aria-label':'tools'.split()}), dict(attrs={'data-videoid':True}), dict(name='button meta link'.split()), dict(id=lambda x: x and x.startswith('story-ad-')), + dict(name='head'), dict(name='a', href=lambda x: x and '#story-continues-' in x), dict(name='a', href=lambda x: x and '#whats-next' in x), dict(id=lambda x: x and 'sharetools-' in x), dict(id='newsletter-promo supported-by-ad bottom-wrapper'.split()), classes('story-print-citation supported-by accessibility-ad-header visually-hidden bottom-of-article ad'), - dict(attrs={'class': lambda x: x and ('SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x)}), + dict(attrs={'class': lambda x: x and ('SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x or 'accessibility-visuallyHidden' in x or 'RelatedCoverage' in x)}), ] def preprocess_html(self, soup):