From 62bf0ab006f78ecc4a7fc5387c110decd817ec98 Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Thu, 19 Sep 2024 09:18:58 +0530 Subject: [PATCH] Update SciAm --- recipes/nytfeeds.recipe | 10 ++++++---- recipes/scientific_american.recipe | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe index 5e09f0a5b6..c803ae7ec8 100644 --- a/recipes/nytfeeds.recipe +++ b/recipes/nytfeeds.recipe @@ -33,9 +33,11 @@ def parse_img_grid(g): def parse_byline(byl): for b in byl.get('bylines', {}): yield '
' + b['renderedRepresentation'] + '
' + yield '
' for rl in byl.get('role', {}): if ''.join(parse_cnt(rl)).strip(): - yield '
' + ''.join(parse_cnt(rl)) + '
' + yield ''.join(parse_cnt(rl)) + yield '
' def iso_date(x): dt = datetime.fromisoformat(x[:-1]) + timedelta(seconds=time.timezone) @@ -47,7 +49,7 @@ def parse_header(h): if h.get('headline'): yield ''.join(parse_types(h['headline'])) if h.get('summary'): - yield '

' + ''.join(parse_types(h['summary'])) + '

' + yield '

' + ''.join(parse_types(h['summary'])) + '

' if h.get('ledeMedia'): yield ''.join(parse_types(h['ledeMedia'])) if h.get('byline'): @@ -201,7 +203,6 @@ class nytFeeds(BasicNewsRecipe): .byl, .time { font-size:small; color:#202020; } .cap { font-size:small; text-align:center; } .cred { font-style:italic; font-size:small; } - .sub { font-style:italic; } em, blockquote { color: #202020; } .sc { font-variant: small-caps; } .lbl { font-size:small; color:#404040; } @@ -263,5 +264,6 @@ class nytFeeds(BasicNewsRecipe): def get_article_url(self, article): url = BasicNewsRecipe.get_article_url(self, article) - if not re.search(r'/video/|live|/athletic/', url): + # you can remove '|/espanol/' from code below to include spanish articles. + if not re.search(r'/video/|live|/athletic/|/espanol/', url): return url diff --git a/recipes/scientific_american.recipe b/recipes/scientific_american.recipe index a5bbd7d609..10ffc5bb1d 100644 --- a/recipes/scientific_american.recipe +++ b/recipes/scientific_american.recipe @@ -36,7 +36,7 @@ class ScientificAmerican(BasicNewsRecipe): keep_only_tags = [ prefixed_classes( - 'article_hed- article_dek- article_authors- lead_image- article__content- bio-' + 'article_hed- article_dek- article_authors- lead_image- article__body- bio-' ), ] remove_tags = [ @@ -45,7 +45,7 @@ class ScientificAmerican(BasicNewsRecipe): def preprocess_html(self, soup): for fig in soup.findAll('figcaption'): - for p in fig.findAll('p'): + for p in fig.findAll(['p', 'div']): p.name = 'span' return soup