From 7b551046d2b51761cece2ff32d6f5bb2bb753fce Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Fri, 25 Jul 2025 00:12:40 +0530 Subject: [PATCH] update wapo Fix order of HTML parsing in preprocess_raw_html --- recipes/wash_post.recipe | 2 +- recipes/wash_post_print.recipe | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/recipes/wash_post.recipe b/recipes/wash_post.recipe index 09e06c1177..b37a3d83cc 100644 --- a/recipes/wash_post.recipe +++ b/recipes/wash_post.recipe @@ -94,11 +94,11 @@ class TheWashingtonPost(BasicNewsRecipe): ] def preprocess_raw_html(self, raw, url): + root = parse(raw) if '/interactive/' in url: return ('

' + root.xpath('//h1')[0].text + '

' 'This article is supposed to be read in a browser.' '
') - root = parse(raw) m = root.xpath('//script[@id="__NEXT_DATA__"]') data = json.loads(m[0].text) diff --git a/recipes/wash_post_print.recipe b/recipes/wash_post_print.recipe index 1e7a5aa9cd..1d9cf58074 100644 --- a/recipes/wash_post_print.recipe +++ b/recipes/wash_post_print.recipe @@ -81,11 +81,11 @@ class wapoprint(BasicNewsRecipe): return feeds def preprocess_raw_html(self, raw, url): + root = parse(raw) if '/interactive/' in url: return ('

' + root.xpath('//h1')[0].text + '

' 'This article is supposed to be read in a browser.' '
') - root = parse(raw) m = root.xpath('//script[@id="__NEXT_DATA__"]') data = json.loads(m[0].text)