From 5372b390d7e0ad49e243507df8cc9718492f904c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sat, 26 Mar 2016 10:36:51 +0530
Subject: [PATCH] When rendering books serialize HTML as JSON

Can be used to easily dynamically populate the DOM using the DOM API.
Need to do this since document.write() is flaky in some browsers
---
 src/calibre/srv/render_book.py | 89 ++++++++++++++++++++++++----------
 1 file changed, 64 insertions(+), 25 deletions(-)

diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py
index 4817654372..6db12fc588 100644
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@@ -5,15 +5,16 @@
 from __future__ import (unicode_literals, division, absolute_import,
                         print_function)
 import sys, re, os, json
+from collections import defaultdict
+from itertools import count
 from functools import partial
 from future_builtins import map
 from urlparse import urlparse
 
 from cssutils import replaceUrls
-from lxml.etree import Comment, tostring
 
 from calibre.ebooks.oeb.base import (
-    OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML)
+    OEB_DOCS, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML_NS)
 from calibre.ebooks.oeb.iterator.book import extract_book
 from calibre.ebooks.oeb.polish.container import Container as ContainerBase
 from calibre.ebooks.oeb.polish.cover import set_epub_cover, find_cover_image
@@ -72,11 +73,16 @@ class Container(ContainerBase):
         # Mark the spine as dirty since we have to ensure it is normalized
         for name in data['spine']:
             self.parsed(name), self.dirty(name)
-        self.inject_script(data['spine'])
         self.virtualized_names = set()
         self.virtualize_resources()
         def manifest_data(name):
-            return {'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':self.mime_map.get(name)}
+            mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
+            return {
+                'size':os.path.getsize(self.name_path_map[name]),
+                'is_virtualized': name in self.virtualized_names,
+                'mimetype':mt,
+                'is_html': mt in OEB_DOCS
+            }
         data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
         self.commit()
         for name in excluded_names:
@@ -101,20 +107,6 @@ class Container(ContainerBase):
         self.dirty(self.opf_name)
         return raster_cover_name, titlepage_name
 
-    def inject_script(self, spine):
-        src = 'injected-script-' + self.book_render_data['link_uid']
-        for name in spine:
-            root = self.parsed(name)
-            head = tuple(root.iterchildren(XHTML('head')))
-            head = head[0] if head else root.makeelement(XHTML('head'))
-            root.insert(0, head)
-            script = root.makeelement(XHTML('script'))
-            script.set('type', 'text/javascript')
-            script.set('src', src)
-            script.set('data-secret', 'secret-key-' + self.book_render_data['link_uid'])
-            head.insert(0, script)
-            self.dirty(name)
-
     def virtualize_resources(self):
 
         changed = set()
@@ -146,6 +138,7 @@ class Container(ContainerBase):
             return url
 
         for name, mt in self.mime_map.iteritems():
+            mt = mt.lower()
             if mt in OEB_STYLES:
                 replaceUrls(self.parsed(name), partial(link_replacer, name))
                 self.virtualized_names.add(name)
@@ -157,7 +150,8 @@ class Container(ContainerBase):
                     href = a.get('href')
                     if href.startswith(link_uid):
                         a.set('href', 'javascript:void(0)')
-                        a.set('data-' + link_uid, href.split('|')[1])
+                        parts = decode_url(href.split('|')[1])
+                        a.set('data-' + link_uid, json.dumps({'name':parts[0], 'frag':parts[1]}, ensure_ascii=False))
                     else:
                         a.set('target', '_blank')
                     changed.add(name)
@@ -171,15 +165,60 @@ class Container(ContainerBase):
         tuple(map(self.dirty, changed))
 
     def serialize_item(self, name):
-        mt = self.mime_map[name]
+        mt = (self.mime_map[name] or '').lower()
         if mt not in OEB_DOCS:
             return ContainerBase.serialize_item(self, name)
-        # Normalize markup
         root = self.parsed(name)
-        for comment in tuple(root.iterdescendants(Comment)):
-            comment.getparent().remove(comment)
-        escape_cdata(root)
-        return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
+        return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
+
+def split_name(name):
+    l, r = name.partition('}')[::2]
+    if r:
+        return l[1:], r
+    return None, l
+
+def serialize_elem(elem, nsmap):
+    ns, name = split_name(elem.tag)
+    attribs = []
+    ans = {'n':name}
+    if elem.text:
+        ans['te'] = elem.text
+    if elem.tail:
+        ans['ta'] = elem.tail
+    if ns:
+        ns = nsmap[ns]
+        if ns:
+            ans['ns'] = ns
+    for attr, val in elem.items():
+        attr_ns, aname = split_name(attr)
+        s = {'n':aname, 'v':val}
+        if attr_ns:
+            attr_ns = nsmap[attr_ns]
+            if attr_ns:
+                s['ns'] = attr_ns
+        attribs.append(s)
+    if attribs:
+        ans['a'] = attribs
+    return ans
+
+def html_as_dict(root):
+    nsmap = defaultdict(count().next)
+    nsmap[XHTML_NS]
+    tags = [serialize_elem(root, nsmap)]
+    tree = {'t':0}
+    stack = [(root, tree)]
+    while stack:
+        elem, node = stack.pop()
+        for i, child in enumerate(elem.iterchildren('*')):
+            if i == 0:
+                node['c'] = []
+            cnode = serialize_elem(child, nsmap)
+            tags.append(cnode)
+            tree_node = {'t':len(tags) - 1}
+            node['c'].append(tree_node)
+            stack.append((child, tree_node))
+    ns_map = [ns for ns, nsnum in sorted(nsmap.iteritems(), key=lambda x: x[1])]
+    return {'ns_map':ns_map, 'tag_map':tags, 'tree':tree}
 
 def render(pathtoebook, output_dir, book_hash=None):
     Container(pathtoebook, output_dir, book_hash=book_hash)