mirror of
https://github.com/kovidgoyal/calibre.git
synced 2026-06-05 13:35:21 -04:00
HTML 5 parser: drop xmlns:xml declarations on <html> and <body>
This commit is contained in:
@@ -399,6 +399,8 @@ class TreeBuilder(BaseTreeBuilder):
|
||||
except TypeError:
|
||||
pass
|
||||
except ValueError:
|
||||
if k == 'xmlns:xml':
|
||||
continue
|
||||
if k == 'xml:lang' and 'lang' not in html.attrib:
|
||||
k = 'lang'
|
||||
html.set(to_xml_name(k), v)
|
||||
@@ -414,6 +416,8 @@ class TreeBuilder(BaseTreeBuilder):
|
||||
except TypeError:
|
||||
pass
|
||||
except ValueError:
|
||||
if k == 'xmlns:xml':
|
||||
continue
|
||||
if k == 'xml:lang' and 'lang' not in body.attrib:
|
||||
k = 'lang'
|
||||
body.set(to_xml_name(k), v)
|
||||
|
||||
@@ -177,6 +177,9 @@ class ParsingTests(BaseTest):
|
||||
for i, (k, v) in enumerate(root.xpath('//*[local-name()="%s"]' % tag)[0].items()):
|
||||
self.assertEqual(i+1, int(v))
|
||||
|
||||
root = parse('<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en-US" xmlns:xml="http://www.w3.org/XML/1998/namespace"><body/></html>')
|
||||
self.assertNotIn('xmlnsU0003Axml', root.attrib, 'xml namespace declaration not removed')
|
||||
|
||||
def timing():
|
||||
import time, sys
|
||||
from calibre.ebooks.chardet import xml_to_unicode
|
||||
|
||||
Reference in New Issue
Block a user