From 59ecde1963935d563767acec7cb3f279782e0547 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 20 Oct 2008 10:38:55 -0700
Subject: [PATCH] Fix #1187 (Producing blank ePub files.)

---
 src/calibre/ebooks/html.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index 64168823d5..e10aa9a5b3 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -421,13 +421,23 @@ class Parser(PreProcessor, LoggingInterface):
     def save_path(self):
         return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
     
+    def declare_xhtml_namespace(self, match):
+        if not match.group('raw'):
+            return '<html xmlns="http://www.w3.org/1999/xhtml">'
+        raw = match.group('raw')
+        m = re.search(r'(?i)xmlns\s*=\s*[\'"](?P<uri>[^"\']*)[\'"]', raw)
+        if not m:
+            return '<html xmlns="http://www.w3.org/1999/xhtml" %s>'%raw
+        else:
+            return  match.group().sub(m.group('uri'), "http://www.w3.org/1999/xhtml")
+    
     def save(self):
         '''
         Save processed HTML into the content directory.
         Should be called after all HTML processing is finished.
         '''
         ans = tostring(self.root, pretty_print=self.opts.pretty_print)
-        ans = re.compile(r'<html>', re.IGNORECASE).sub('<html xmlns="http://www.w3.org/1999/xhtml">', ans[:1000]) + ans[1000:]
+        ans = re.sub(r'(?i)<\s*html(?P<raw>\s+[^>]*){0,1}>', self.declare_xhtml_namespace, ans[:1000]) + ans[1000:]
         ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
             
         with open(self.save_path(), 'wb') as f: