From 812cf96bc53bfc3edab76ffdd28fffe0b41a3f9a Mon Sep 17 00:00:00 2001 From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com> Date: Mon, 30 Sep 2024 10:05:02 +0530 Subject: [PATCH] ... --- recipes/nytfeeds.recipe | 32 ++++++++-------------- src/calibre/web/site_parsers/nytimes.py | 36 +++++++++---------------- 2 files changed, 24 insertions(+), 44 deletions(-) diff --git a/recipes/nytfeeds.recipe b/recipes/nytfeeds.recipe index 0ac1ed5fcd..6214051f19 100644 --- a/recipes/nytfeeds.recipe +++ b/recipes/nytfeeds.recipe @@ -109,11 +109,12 @@ def parse_cnt(cnt): yield ''.join(parse_fmt_type(cnt)) else: for cnt_ in cnt[k]: - yield from parse_types(cnt_) + yield ''.join(parse_types(cnt_)) if isinstance(cnt[k], dict): - yield from parse_types(cnt[k]) - if cnt.get('text') and 'formats' not in cnt: - yield cnt['text'] + yield ''.join(parse_types(cnt[k])) + if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt: + if isinstance(cnt['text'], str): + yield cnt['text'] def parse_types(x): typename = x.get('__typename', '') @@ -141,9 +142,6 @@ def parse_types(x): elif typename == 'RuleBlock': yield '
{"".join(parse_cnt(x))}
' + elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}: - if x.get('media'): - yield "".join(parse_types(x['media'])) - elif "".join(parse_cnt(x)).strip(): - yield f'{"".join(parse_cnt(x))}
' + if "".join(parse_cnt(x)).strip(): + yield "".join(parse_cnt(x)) def article_parse(data): yield "" diff --git a/src/calibre/web/site_parsers/nytimes.py b/src/calibre/web/site_parsers/nytimes.py index b82f7a124b..eeefb3c51c 100644 --- a/src/calibre/web/site_parsers/nytimes.py +++ b/src/calibre/web/site_parsers/nytimes.py @@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr from calibre.utils.iso8601 import parse_iso8601 -module_version = 7 # needed for live updates +module_version = 8 # needed for live updates pprint @@ -111,11 +111,12 @@ def parse_cnt(cnt): yield ''.join(parse_fmt_type(cnt)) else: for cnt_ in cnt[k]: - yield from parse_types(cnt_) + yield ''.join(parse_types(cnt_)) if isinstance(cnt[k], dict): - yield from parse_types(cnt[k]) - if cnt.get('text') and 'formats' not in cnt: - yield cnt['text'] + yield ''.join(parse_types(cnt[k])) + if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt: + if isinstance(cnt['text'], str): + yield cnt['text'] def parse_types(x): typename = x.get('__typename', '') @@ -143,9 +144,6 @@ def parse_types(x): elif typename == 'RuleBlock': yield '{"".join(parse_cnt(x))}
' + elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}: - if x.get('media'): - yield "".join(parse_types(x['media'])) - elif "".join(parse_cnt(x)).strip(): - yield f'{"".join(parse_cnt(x))}
' + if "".join(parse_cnt(x)).strip(): + yield "".join(parse_cnt(x)) def article_parse(data): yield ""