From 2a15d7fa57af69091302ce4c80a82f5411d1d8d7 Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 30 Sep 2024 10:03:22 +0530
Subject: [PATCH 1/2] update science journal

---
 recipes/science_advances.recipe | 3 +--
 recipes/science_journal.recipe  | 3 +--
 recipes/sciimmunol.recipe       | 3 +--
 recipes/scirobotics.recipe      | 3 +--
 recipes/scisignaling.recipe     | 3 +--
 recipes/scistm.recipe           | 3 +--
 6 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/recipes/science_advances.recipe b/recipes/science_advances.recipe
index 689a1132ae..cd2a6818f0 100644
--- a/recipes/science_advances.recipe
+++ b/recipes/science_advances.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)
diff --git a/recipes/science_journal.recipe b/recipes/science_journal.recipe
index c3051af82c..0317146c76 100644
--- a/recipes/science_journal.recipe
+++ b/recipes/science_journal.recipe
@@ -92,8 +92,7 @@ class science(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)
diff --git a/recipes/sciimmunol.recipe b/recipes/sciimmunol.recipe
index 77b5802972..21d79fd2d8 100644
--- a/recipes/sciimmunol.recipe
+++ b/recipes/sciimmunol.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)
diff --git a/recipes/scirobotics.recipe b/recipes/scirobotics.recipe
index 36e914d409..45a4df16da 100644
--- a/recipes/scirobotics.recipe
+++ b/recipes/scirobotics.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)
diff --git a/recipes/scisignaling.recipe b/recipes/scisignaling.recipe
index ed8bcdf151..18769af889 100644
--- a/recipes/scisignaling.recipe
+++ b/recipes/scisignaling.recipe
@@ -93,8 +93,7 @@ class scienceadv(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)
diff --git a/recipes/scistm.recipe b/recipes/scistm.recipe
index 42b85c3719..101b60d7c4 100644
--- a/recipes/scistm.recipe
+++ b/recipes/scistm.recipe
@@ -94,8 +94,7 @@ class scienceadv(BasicNewsRecipe):
 
         feeds = []
 
-        div = soup.find('div', attrs={'class':'toc__body'})
-        for sec in div.findAll('section', **classes('toc__section')):
+        for sec in soup.findAll('section', **classes('toc__section')):
             name = sec.find(**classes('sidebar-article-title--decorated'))
             section = self.tag_to_string(name).strip()
             self.log(section)

From 812cf96bc53bfc3edab76ffdd28fffe0b41a3f9a Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Mon, 30 Sep 2024 10:05:02 +0530
Subject: [PATCH 2/2] ...

---
 recipes/nytfeeds.recipe                 | 32 ++++++++--------------
 src/calibre/web/site_parsers/nytimes.py | 36 +++++++++----------------
 2 files changed, 24 insertions(+), 44 deletions(-)

diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe
index 0ac1ed5fcd..6214051f19 100644
--- a/recipes/nytfeeds.recipe
+++ b/recipes/nytfeeds.recipe
@@ -109,11 +109,12 @@ def parse_cnt(cnt):
                 yield ''.join(parse_fmt_type(cnt))
             else:
                 for cnt_ in cnt[k]:
-                    yield from parse_types(cnt_)
+                    yield ''.join(parse_types(cnt_))
         if isinstance(cnt[k], dict):
-            yield from parse_types(cnt[k])
-    if cnt.get('text') and 'formats' not in cnt:
-        yield cnt['text']
+            yield ''.join(parse_types(cnt[k]))
+    if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
+        if isinstance(cnt['text'], str):
+            yield cnt['text']
 
 def parse_types(x):
     typename = x.get('__typename', '')
@@ -141,9 +142,6 @@ def parse_types(x):
     elif typename == 'RuleBlock':
         yield '<hr/>'
 
-    elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
-        yield "".join(parse_types(x['media']))
-
     elif typename == 'Image':
         yield "".join(parse_image(x))
 
@@ -161,23 +159,15 @@ def parse_types(x):
     elif typename == 'ListItemBlock':
         yield f'<li>{"".join(parse_cnt(x))}</li>'
 
-    elif typename == 'CapsuleBlock':
-        if x['capsuleContent'].get('body'):
-            yield "".join(parse_cnt(x['capsuleContent']['body']))
-    elif typename == 'Capsule':
-        yield "".join(parse_cnt(x['body']))
-
-    elif typename in {
-        'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock', 
-        'SummaryBlock', 'VisualStackBlock'
-    }:
+    elif typename == 'TextInline':
         yield "".join(parse_cnt(x))
 
+    elif typename in {'DetailBlock', 'TextRunKV'}:
+        yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+
     elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
-        if x.get('media'):
-            yield "".join(parse_types(x['media']))
-        elif "".join(parse_cnt(x)).strip():
-            yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+        if "".join(parse_cnt(x)).strip():
+            yield "".join(parse_cnt(x))
 
 def article_parse(data):
     yield "<html><body>"
diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py
index b82f7a124b..eeefb3c51c 100644
--- a/src/calibre/web/site_parsers/nytimes.py
+++ b/src/calibre/web/site_parsers/nytimes.py
@@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr
 
 from calibre.utils.iso8601 import parse_iso8601
 
-module_version = 7  # needed for live updates
+module_version = 8  # needed for live updates
 pprint
 
 
@@ -111,11 +111,12 @@ def parse_cnt(cnt):
                 yield ''.join(parse_fmt_type(cnt))
             else:
                 for cnt_ in cnt[k]:
-                    yield from parse_types(cnt_)
+                    yield ''.join(parse_types(cnt_))
         if isinstance(cnt[k], dict):
-            yield from parse_types(cnt[k])
-    if cnt.get('text') and 'formats' not in cnt:
-        yield cnt['text']
+            yield ''.join(parse_types(cnt[k]))
+    if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
+        if isinstance(cnt['text'], str):
+            yield cnt['text']
 
 def parse_types(x):
     typename = x.get('__typename', '')
@@ -143,9 +144,6 @@ def parse_types(x):
     elif typename == 'RuleBlock':
         yield '<hr/>'
 
-    elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
-        yield "".join(parse_types(x['media']))
-
     elif typename == 'Image':
         yield "".join(parse_image(x))
 
@@ -161,25 +159,17 @@ def parse_types(x):
     elif typename == 'ListBlock':
         yield f'<ul>{"".join(parse_cnt(x))}</ul>'
     elif typename == 'ListItemBlock':
-        yield f'<li>{"".join(parse_cnt(x))}</li>'
+        yield f'\n<li>{"".join(parse_cnt(x))}</li>'
 
-    elif typename == 'CapsuleBlock':
-        if x['capsuleContent'].get('body'):
-            yield "".join(parse_cnt(x['capsuleContent']['body']))
-    elif typename == 'Capsule':
-        yield "".join(parse_cnt(x['body']))
-
-    elif typename in {
-        'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock', 
-        'SummaryBlock', 'VisualStackBlock'
-    }:
+    elif typename == 'TextInline':
         yield "".join(parse_cnt(x))
 
+    elif typename in {'DetailBlock', 'TextRunKV'}:
+        yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+
     elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
-        if x.get('media'):
-            yield "".join(parse_types(x['media']))
-        elif "".join(parse_cnt(x)).strip():
-            yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
+        if "".join(parse_cnt(x)).strip():
+            yield "".join(parse_cnt(x))
 
 def article_parse(data):
     yield "<html><body>"