This commit is contained in:
unkn0w7n 2024-09-30 10:05:02 +05:30
parent 2a15d7fa57
commit 812cf96bc5
2 changed files with 24 additions and 44 deletions

View File

@ -109,10 +109,11 @@ def parse_cnt(cnt):
yield ''.join(parse_fmt_type(cnt))
else:
for cnt_ in cnt[k]:
yield from parse_types(cnt_)
yield ''.join(parse_types(cnt_))
if isinstance(cnt[k], dict):
yield from parse_types(cnt[k])
if cnt.get('text') and 'formats' not in cnt:
yield ''.join(parse_types(cnt[k]))
if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
if isinstance(cnt['text'], str):
yield cnt['text']
def parse_types(x):
@ -141,9 +142,6 @@ def parse_types(x):
elif typename == 'RuleBlock':
yield '<hr/>'
elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
yield "".join(parse_types(x['media']))
elif typename == 'Image':
yield "".join(parse_image(x))
@ -161,24 +159,16 @@ def parse_types(x):
elif typename == 'ListItemBlock':
yield f'<li>{"".join(parse_cnt(x))}</li>'
elif typename == 'CapsuleBlock':
if x['capsuleContent'].get('body'):
yield "".join(parse_cnt(x['capsuleContent']['body']))
elif typename == 'Capsule':
yield "".join(parse_cnt(x['body']))
elif typename in {
'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock',
'SummaryBlock', 'VisualStackBlock'
}:
elif typename == 'TextInline':
yield "".join(parse_cnt(x))
elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
if x.get('media'):
yield "".join(parse_types(x['media']))
elif "".join(parse_cnt(x)).strip():
elif typename in {'DetailBlock', 'TextRunKV'}:
yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
if "".join(parse_cnt(x)).strip():
yield "".join(parse_cnt(x))
def article_parse(data):
yield "<html><body>"
for d in data:

View File

@ -9,7 +9,7 @@ from xml.sax.saxutils import escape, quoteattr
from calibre.utils.iso8601 import parse_iso8601
module_version = 7 # needed for live updates
module_version = 8 # needed for live updates
pprint
@ -111,10 +111,11 @@ def parse_cnt(cnt):
yield ''.join(parse_fmt_type(cnt))
else:
for cnt_ in cnt[k]:
yield from parse_types(cnt_)
yield ''.join(parse_types(cnt_))
if isinstance(cnt[k], dict):
yield from parse_types(cnt[k])
if cnt.get('text') and 'formats' not in cnt:
yield ''.join(parse_types(cnt[k]))
if cnt.get('text') and 'formats' not in cnt and 'content' not in cnt:
if isinstance(cnt['text'], str):
yield cnt['text']
def parse_types(x):
@ -143,9 +144,6 @@ def parse_types(x):
elif typename == 'RuleBlock':
yield '<hr/>'
elif typename in {'ImageBlock', 'VideoBlock', 'InteractiveBlock'}:
yield "".join(parse_types(x['media']))
elif typename == 'Image':
yield "".join(parse_image(x))
@ -161,26 +159,18 @@ def parse_types(x):
elif typename == 'ListBlock':
yield f'<ul>{"".join(parse_cnt(x))}</ul>'
elif typename == 'ListItemBlock':
yield f'<li>{"".join(parse_cnt(x))}</li>'
yield f'\n<li>{"".join(parse_cnt(x))}</li>'
elif typename == 'CapsuleBlock':
if x['capsuleContent'].get('body'):
yield "".join(parse_cnt(x['capsuleContent']['body']))
elif typename == 'Capsule':
yield "".join(parse_cnt(x['body']))
elif typename in {
'TextInline', 'TextOnlyDocumentBlock', 'DocumentBlock',
'SummaryBlock', 'VisualStackBlock'
}:
elif typename == 'TextInline':
yield "".join(parse_cnt(x))
elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
if x.get('media'):
yield "".join(parse_types(x['media']))
elif "".join(parse_cnt(x)).strip():
elif typename in {'DetailBlock', 'TextRunKV'}:
yield f'<p><i>{"".join(parse_cnt(x))}</i></p>'
elif typename and typename not in {'RelatedLinksBlock', 'Dropzone'}:
if "".join(parse_cnt(x)).strip():
yield "".join(parse_cnt(x))
def article_parse(data):
yield "<html><body>"
for d in data: