From ae26837744d60cc4c6e7fa67f1a155faa41e08f2 Mon Sep 17 00:00:00 2001 From: LAntoine Date: Sat, 13 Jul 2024 17:25:01 +0200 Subject: [PATCH 1/2] Fix Mediapart * Fix login * Handle Google news cookies * Ignore screen-reader-only text --- recipes/mediapart.recipe | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/recipes/mediapart.recipe b/recipes/mediapart.recipe index 16d98506cc..38ed1ad1b4 100644 --- a/recipes/mediapart.recipe +++ b/recipes/mediapart.recipe @@ -43,7 +43,7 @@ class Mediapart(BasicNewsRecipe): ] remove_tags = [ - classes('action-links media--rich read-also login-subscribe print-source_url'), + classes('action-links media--rich read-also login-subscribe print-source_url screen-reader-only'), dict(name='svg'), ] @@ -92,17 +92,18 @@ class Mediapart(BasicNewsRecipe): def get_browser(self): # -- Handle login - - def is_form_login(form): - return "id" in form.attrs and form.attrs['id'] == "logFormEl" - br = BasicNewsRecipe.get_browser(self) if self.username is not None and self.password is not None: br.open('https://www.mediapart.fr/login') - br.select_form(predicate=is_form_login) - br['name'] = self.username + br.select_form(nr=0) + br['email'] = self.username br['password'] = self.password br.submit() + + # -- Handle Google's cookies consent page + br.open('https://news.google.com') + br.select_form(action="https://consent.google.com/save") + br.submit() return br def default_cover(self, cover_file): From 08608b189b3278375ddabad6335eb2133bb82cb9 Mon Sep 17 00:00:00 2001 From: LAntoine Date: Sat, 13 Jul 2024 17:25:46 +0200 Subject: [PATCH 2/2] Fix Bloomberg * Handle Google news cookies --- recipes/bloomberg.recipe | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/recipes/bloomberg.recipe b/recipes/bloomberg.recipe index 51c2eeaa78..9c62f4871b 100644 --- a/recipes/bloomberg.recipe +++ b/recipes/bloomberg.recipe @@ -207,3 +207,11 @@ class Bloomberg(BasicNewsRecipe): article.summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) article.text_summary = self.tag_to_string(soup.find('div', attrs={'class':'subhead'})) article.title = article.title.replace(' - Bloomberg', '') + + def get_browser(self): + # -- Handle Google's cookies consent page + br = BasicNewsRecipe.get_browser(self) + br.open('https://news.google.com') + br.select_form(action="https://consent.google.com/save") + br.submit() + return br