From d9059b3899fd8f07ce1830989051ef7726fd6d88 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 23 Oct 2023 12:49:09 +0530 Subject: [PATCH] ... --- recipes/wsj.recipe | 2 +- recipes/wsj_free.recipe | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/recipes/wsj.recipe b/recipes/wsj.recipe index 7d5f4ecb62..00a0a5b4e3 100644 --- a/recipes/wsj.recipe +++ b/recipes/wsj.recipe @@ -67,7 +67,7 @@ class WSJ(BasicNewsRecipe): dict(attrs={'data-type':'inset'}), dict(attrs={'data-spotim-app':'conversation'}), dict(attrs={'data-spot-im-class':['message-text', 'conversation-root']}), - dict(attrs={'id':lambda x: x and x.startswith(('comments_sector', wrapper-INLINE', 'audio-tag-inner-audio-'))}), + dict(attrs={'id':lambda x: x and x.startswith(('comments_sector', 'wrapper-INLINE', 'audio-tag-inner-audio-'))}), ] articles_are_obfuscated = True diff --git a/recipes/wsj_free.recipe b/recipes/wsj_free.recipe index 82a9d3dac6..d6a123e2e8 100644 --- a/recipes/wsj_free.recipe +++ b/recipes/wsj_free.recipe @@ -54,7 +54,7 @@ class WSJ(BasicNewsRecipe): #big-top-caption { font-size:small; text-align:center; } [data-type:"tagline"] { font-style:italic; color:#202020; } ''' - + keep_only_tags = [ dict(name=['h1', 'h2']), dict(attrs={'aria-describedby':'big-top-caption'}), @@ -68,7 +68,7 @@ class WSJ(BasicNewsRecipe): dict(attrs={'data-type':'inset'}), dict(attrs={'data-spotim-app':'conversation'}), dict(attrs={'data-spot-im-class':['message-text', 'conversation-root']}), - dict(attrs={'id':lambda x: x and x.startswith(('comments_sector', wrapper-INLINE', 'audio-tag-inner-audio-'))}), + dict(attrs={'id':lambda x: x and x.startswith(('comments_sector', 'wrapper-INLINE', 'audio-tag-inner-audio-'))}), ] articles_are_obfuscated = True @@ -85,7 +85,7 @@ class WSJ(BasicNewsRecipe): pt.close() return pt.name - def preprocess_html(self, soup): + def preprocess_html(self, soup): for img in soup.findAll('img', attrs={'old-src':True}): img['src'] = img['old-src'] for p in soup.findAll('div', attrs={'data-type':['paragraph', 'image']}):