From 522b8cfb0a6a34a0a287ceabeecb5f33b7caac34 Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Tue, 22 May 2018 14:11:01 +0100 Subject: [PATCH 1/2] revert regex and add filter to strip head --- recipes/nytimes.recipe | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index c14892842b..cad2141798 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -86,12 +86,13 @@ class NewYorkTimes(BasicNewsRecipe): dict(attrs={'data-videoid':True}), dict(name='button meta link'.split()), dict(id=lambda x: x and x.startswith('story-ad-')), + dict(name='head'), dict(name='a', href=lambda x: x and '#story-continues-' in x), dict(name='a', href=lambda x: x and '#whats-next' in x), dict(id=lambda x: x and 'sharetools-' in x), dict(id='newsletter-promo supported-by-ad bottom-wrapper'.split()), classes('story-print-citation supported-by accessibility-ad-header visually-hidden bottom-of-article ad'), - dict(attrs={'class': lambda x: x and ('SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x)}), + dict(attrs={'class': lambda x: x and ('SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x or 'accessibility-visuallyHidden' in x or 'RelatedCoverage' in x)}), ] def preprocess_html(self, soup): From 59a956c017145c3766809ca2750623774d14e382 Mon Sep 17 00:00:00 2001 From: bobbysteel Date: Tue, 22 May 2018 14:42:49 +0100 Subject: [PATCH 2/2] Update nytimes.recipe --- recipes/nytimes.recipe | 1 - 1 file changed, 1 deletion(-) diff --git a/recipes/nytimes.recipe b/recipes/nytimes.recipe index cad2141798..2d8050bf23 100644 --- a/recipes/nytimes.recipe +++ b/recipes/nytimes.recipe @@ -79,7 +79,6 @@ class NewYorkTimes(BasicNewsRecipe): no_stylesheets = True compress_news_images = True compress_news_images_auto_size = 5 - preprocess_regexps = [(re.compile(r'(?s)'), lambda m: '')] remove_tags = [ dict(attrs={'aria-label':'tools'.split()}),