From 59ecde1963935d563767acec7cb3f279782e0547 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 20 Oct 2008 10:38:55 -0700 Subject: [PATCH] Fix #1187 (Producing blank ePub files.) --- src/calibre/ebooks/html.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py index 64168823d5..e10aa9a5b3 100644 --- a/src/calibre/ebooks/html.py +++ b/src/calibre/ebooks/html.py @@ -421,13 +421,23 @@ class Parser(PreProcessor, LoggingInterface): def save_path(self): return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path]) + def declare_xhtml_namespace(self, match): + if not match.group('raw'): + return '' + raw = match.group('raw') + m = re.search(r'(?i)xmlns\s*=\s*[\'"](?P[^"\']*)[\'"]', raw) + if not m: + return ''%raw + else: + return match.group().sub(m.group('uri'), "http://www.w3.org/1999/xhtml") + def save(self): ''' Save processed HTML into the content directory. Should be called after all HTML processing is finished. ''' ans = tostring(self.root, pretty_print=self.opts.pretty_print) - ans = re.compile(r'', re.IGNORECASE).sub('', ans[:1000]) + ans[1000:] + ans = re.sub(r'(?i)<\s*html(?P\s+[^>]*){0,1}>', self.declare_xhtml_namespace, ans[:1000]) + ans[1000:] ans = re.compile(r'', re.IGNORECASE).sub('\n\t\n', ans[:1000])+ans[1000:] with open(self.save_path(), 'wb') as f: