Ignore comments when getting text from serialized html

This commit is contained in:
Kovid Goyal 2019-11-03 09:58:35 +05:30
parent 47d3d16978
commit dedea474c8
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -401,7 +401,7 @@ def text_from_serialized_html(data):
for child in serialized_data.tree.c: for child in serialized_data.tree.c:
if child.n is 'body': if child.n is 'body':
stack.push(child) stack.push(child)
ignore_text = {'script':True, 'style':True} ignore_text = {'script':True, 'style':True, 'title': True}
while stack.length: while stack.length:
node = stack.pop() node = stack.pop()
if jstype(node) is 'string': if jstype(node) is 'string':
@ -411,7 +411,7 @@ def text_from_serialized_html(data):
src = tag_map[node[0]] src = tag_map[node[0]]
else: else:
src = node src = node
if not ignore_text[src.n] and src.x: if src.n and not ignore_text[src.n] and src.x:
ans.push(src.x) ans.push(src.x)
if src.l: if src.l:
stack.push(src.l) stack.push(src.l)