mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTML 5 parser: drop xmlns:xml declarations on <html> and <body>
This commit is contained in:
parent
31a02ba0f6
commit
a30db00a8e
@ -399,6 +399,8 @@ class TreeBuilder(BaseTreeBuilder):
|
||||
except TypeError:
|
||||
pass
|
||||
except ValueError:
|
||||
if k == 'xmlns:xml':
|
||||
continue
|
||||
if k == 'xml:lang' and 'lang' not in html.attrib:
|
||||
k = 'lang'
|
||||
html.set(to_xml_name(k), v)
|
||||
@ -414,6 +416,8 @@ class TreeBuilder(BaseTreeBuilder):
|
||||
except TypeError:
|
||||
pass
|
||||
except ValueError:
|
||||
if k == 'xmlns:xml':
|
||||
continue
|
||||
if k == 'xml:lang' and 'lang' not in body.attrib:
|
||||
k = 'lang'
|
||||
body.set(to_xml_name(k), v)
|
||||
|
@ -177,6 +177,9 @@ class ParsingTests(BaseTest):
|
||||
for i, (k, v) in enumerate(root.xpath('//*[local-name()="%s"]' % tag)[0].items()):
|
||||
self.assertEqual(i+1, int(v))
|
||||
|
||||
root = parse('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" xmlns:xml="http://www.w3.org/XML/1998/namespace"><body/></html>')
|
||||
self.assertNotIn('xmlnsU0003Axml', root.attrib, 'xml namespace declaration not removed')
|
||||
|
||||
def timing():
|
||||
import time, sys
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
Loading…
x
Reference in New Issue
Block a user