Fix #995553 (Add html injects bad meta http-equiv="Content-Type" tag?)

This commit is contained in:
Kovid Goyal 2012-05-06 23:21:38 +05:30
parent 6d0b2ec553
commit cc91807ffa

View File

@ -361,9 +361,11 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
# Remove any encoding-specifying <meta/> elements # Remove any encoding-specifying <meta/> elements
for meta in META_XP(data): for meta in META_XP(data):
meta.getparent().remove(meta) meta.getparent().remove(meta)
etree.SubElement(head, XHTML('meta'), meta = etree.SubElement(head, XHTML('meta'),
attrib={'http-equiv': 'Content-Type', attrib={'http-equiv': 'Content-Type'})
'content': '%s; charset=utf-8' % XHTML_NS}) meta.set('content', 'text/html; charset=utf-8') # Ensure content is second
# attribute
# Ensure has a <body/> # Ensure has a <body/>
if not xpath(data, '/h:html/h:body'): if not xpath(data, '/h:html/h:body'):
body = xpath(data, '//h:body') body = xpath(data, '//h:body')