diff --git a/recipes/nytimesbook.recipe b/recipes/nytimesbook.recipe index cc886a7521..2112226b31 100644 --- a/recipes/nytimesbook.recipe +++ b/recipes/nytimesbook.recipe @@ -18,13 +18,25 @@ class NewYorkTimesBookReview(BasicNewsRecipe): encoding = 'utf-8' keep_only_tags = [ - dict(itemprop=['headline', 'author', 'associatedMedia', 'articleBody', 'reviewBody']), - classes('story-body StoryBodyCompanionColumn'), + dict(id='story'), ] remove_tags = [ - dict(id=['d-promo-realestate', 'books-update-email-promo']), - dict(style=lambda x: x and 'visibility: hidden' in x), - classes('skip-to-text-link story-meta-footer-sharetools story-footer-links'), + dict(attrs={'aria-label':'tools'.split()}), + dict(attrs={'aria-label': lambda x: x and 'New York Times Logo' in x}), + dict(href='#site-content #site-index'.split()), + dict(attrs={'aria-hidden':'true'}), + dict(attrs={'data-videoid':True}), + dict(name='button meta link'.split()), + dict(id=lambda x: x and x.startswith('story-ad-')), + dict(name='head'), + dict(role='toolbar'), + dict(name='a', href=lambda x: x and '#story-continues-' in x), + dict(name='a', href=lambda x: x and '#whats-next' in x), + dict(id=lambda x: x and 'sharetools-' in x), + dict(id='newsletter-promo supported-by-ad bottom-wrapper'.split()), + classes('story-print-citation supported-by accessibility-ad-header visually-hidden bottom-of-article ad nav-wrapper'), + dict(attrs={'class': lambda x: x and ( + 'SectionBar' in x or 'recirculation' in x or 'ResponsiveAd' in x or 'accessibility-visuallyHidden' in x or 'RelatedCoverage' in x)}), ] def parse_index(self):