Conversion: wrap contents of inline <script> tags in CDATA sections

This commit is contained in:
Kovid Goyal 2016-03-04 14:19:24 +05:30
parent ba0bcb408f
commit 9d25d5ae50

View File

@ -347,13 +347,17 @@ def xml2unicode(root, pretty_print=False):
def xml2text(elem): def xml2text(elem):
return etree.tostring(elem, method='text', encoding=unicode, with_tail=False) return etree.tostring(elem, method='text', encoding=unicode, with_tail=False)
def escape_cdata(root):
pat = re.compile(r'[<>&]')
for elem in root.iterdescendants('{%s}style' % XHTML_NS, '{%s}script' % XHTML_NS):
if elem.text and pat.search(elem.text) is not None:
elem.text = etree.CDATA(elem.text)
def serialize(data, media_type, pretty_print=False): def serialize(data, media_type, pretty_print=False):
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
is_oeb_doc = media_type in OEB_DOCS is_oeb_doc = media_type in OEB_DOCS
if is_oeb_doc: if is_oeb_doc:
for style in data.iterfind('.//{http://www.w3.org/1999/xhtml}style'): escape_cdata(data)
if style.text and re.search(r'[<>&]', style.text) is not None:
style.text = etree.CDATA(style.text)
ans = xml2str(data, pretty_print=pretty_print) ans = xml2str(data, pretty_print=pretty_print)
if is_oeb_doc: if is_oeb_doc:
# Convert self closing div|span|a|video|audio|iframe|etc tags # Convert self closing div|span|a|video|audio|iframe|etc tags