From 61e22407f89b83ff0d69b9ae84223daa07386771 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 26 Jan 2009 00:40:17 -0800
Subject: [PATCH] Fix #1695 (Cover Pages in Penguin samples)

---
 src/calibre/ebooks/epub/from_html.py          |  2 ++
 src/calibre/ebooks/html.py                    | 29 +++++++++++--------
 .../ebooks/oeb/transforms/rasterize.py        |  5 ++--
 3 files changed, 22 insertions(+), 14 deletions(-)
diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py
index c358471f09..458fca152c 100644
--- a/src/calibre/ebooks/epub/from_html.py
+++ b/src/calibre/ebooks/epub/from_html.py
@@ -193,6 +193,8 @@ class HTMLProcessor(Processor, Rationalizer):
         for tag in self.root.xpath('//script'):
             if not tag.text and not tag.get('src', False):
                 tag.getparent().remove(tag)
+                
+        
     
     def save(self):
         for meta in list(self.root.xpath('//meta')):
diff --git a/src/calibre/ebooks/html.py b/src/calibre/ebooks/html.py
index f6659f9f51..32601320d4 100644
--- a/src/calibre/ebooks/html.py
+++ b/src/calibre/ebooks/html.py
@@ -417,39 +417,44 @@ class Parser(PreProcessor, LoggingInterface):
         self.level = self.htmlfile.level
         for f in self.htmlfiles:
             name = os.path.basename(f.path)
+            name = os.path.splitext(name)[0] + '.xhtml'
             if name in self.htmlfile_map.values():
                 name = os.path.splitext(name)[0] + '_cr_%d'%save_counter + os.path.splitext(name)[1]
                 save_counter += 1
             self.htmlfile_map[f.path] = name
         
         self.parse_html()
+        # Handle <image> tags inside embedded <svg>
+        # At least one source of EPUB files (Penguin) uses xlink:href
+        # without declaring the xlink namespace
+        for image in self.root.xpath('//image'): 
+            for attr in image.attrib.keys():
+                if attr.endswith(':href'):
+                    nhref = self.rewrite_links(image.get(attr))
+                    image.set(attr, nhref)
+        
         self.root.rewrite_links(self.rewrite_links, resolve_base_href=False)
         for bad in ('xmlns', 'lang', 'xml:lang'): # lxml also adds these attributes for XHTML documents, leading to duplicates
             if self.root.get(bad, None) is not None:
                 self.root.attrib.pop(bad)
         
+        
+        
     def save_path(self):
         return os.path.join(self.tdir, self.htmlfile_map[self.htmlfile.path])
     
-    def declare_xhtml_namespace(self, match):
-        if not match.group('raw'):
-            return '<html xmlns="http://www.w3.org/1999/xhtml">'
-        raw = match.group('raw')
-        m = re.search(r'(?i)xmlns\s*=\s*[\'"](?P<uri>[^"\']*)[\'"]', raw)
-        if not m:
-            return '<html xmlns="http://www.w3.org/1999/xhtml" %s>'%raw
-        else:
-            return  match.group().replace(m.group('uri'), "http://www.w3.org/1999/xhtml")
-    
     def save(self):
         '''
         Save processed HTML into the content directory.
         Should be called after all HTML processing is finished.
         '''
+        self.root.set('xmlns', 'http://www.w3.org/1999/xhtml')
+        self.root.set('xmlns:xlink', 'http://www.w3.org/1999/xlink')
+        for svg in self.root.xpath('//svg'):
+            svg.set('xmlns', 'http://www.w3.org/2000/svg')
+        
         ans = tostring(self.root, pretty_print=self.opts.pretty_print)
-        ans = re.sub(r'(?i)<\s*html(?P<raw>\s+[^>]*){0,1}>', self.declare_xhtml_namespace, ans[:1000]) + ans[1000:]
         ans = re.compile(r'<head>', re.IGNORECASE).sub('<head>\n\t<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />\n', ans[:1000])+ans[1000:]
-            
         with open(self.save_path(), 'wb') as f:
             f.write(ans)
             return f.name
diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py
index 69f1d0d133..97d73d3dcb 100644
--- a/src/calibre/ebooks/oeb/transforms/rasterize.py
+++ b/src/calibre/ebooks/oeb/transforms/rasterize.py
@@ -46,9 +46,10 @@ class SVGRasterizer(object):
         data = QByteArray(xml2str(elem))
         svg = QSvgRenderer(data)
         size = svg.defaultSize()
+        view_box = elem.get('viewBox', elem.get('viewbox', None))
         if size.width() == 100 and size.height() == 100 \
-           and 'viewBox' in elem.attrib:
-            box = [float(x) for x in elem.attrib['viewBox'].split()]
+           and view_box is not None:
+            box = [float(x) for x in view_box.split()]
             size.setWidth(box[2] - box[0])
             size.setHeight(box[3] - box[1])
         if width or height: