From a30db00a8e70d994da2d901bdc2fba58130efd14 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 11 Dec 2013 09:56:11 +0530 Subject: [PATCH] HTML 5 parser: drop xmlns:xml declarations on and --- src/calibre/ebooks/oeb/polish/parsing.py | 4 ++++ src/calibre/ebooks/oeb/polish/tests/parsing.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/src/calibre/ebooks/oeb/polish/parsing.py b/src/calibre/ebooks/oeb/polish/parsing.py index dee017145a..47dac638bf 100644 --- a/src/calibre/ebooks/oeb/polish/parsing.py +++ b/src/calibre/ebooks/oeb/polish/parsing.py @@ -399,6 +399,8 @@ class TreeBuilder(BaseTreeBuilder): except TypeError: pass except ValueError: + if k == 'xmlns:xml': + continue if k == 'xml:lang' and 'lang' not in html.attrib: k = 'lang' html.set(to_xml_name(k), v) @@ -414,6 +416,8 @@ class TreeBuilder(BaseTreeBuilder): except TypeError: pass except ValueError: + if k == 'xmlns:xml': + continue if k == 'xml:lang' and 'lang' not in body.attrib: k = 'lang' body.set(to_xml_name(k), v) diff --git a/src/calibre/ebooks/oeb/polish/tests/parsing.py b/src/calibre/ebooks/oeb/polish/tests/parsing.py index efea117c19..0344a58ad4 100644 --- a/src/calibre/ebooks/oeb/polish/tests/parsing.py +++ b/src/calibre/ebooks/oeb/polish/tests/parsing.py @@ -177,6 +177,9 @@ class ParsingTests(BaseTest): for i, (k, v) in enumerate(root.xpath('//*[local-name()="%s"]' % tag)[0].items()): self.assertEqual(i+1, int(v)) + root = parse('') + self.assertNotIn('xmlnsU0003Axml', root.attrib, 'xml namespace declaration not removed') + def timing(): import time, sys from calibre.ebooks.chardet import xml_to_unicode