When rendering books serialize HTML as JSON

Can be used to easily dynamically populate the DOM using the DOM API. Need to do this since document.write() is flaky in some browsers
2025-07-09 03:04:10 -04:00 · 2016-03-26 10:36:51 +05:30 · 2016-03-26 10:36:51 +05:30 · 5372b390d7
commit 5372b390d7
parent 49962d34da
1 changed files with 64 additions and 25 deletions
--- a/src/calibre/srv/render_book.py
+++ b/src/calibre/srv/render_book.py
@ -5,15 +5,16 @@
 from __future__ import (unicode_literals, division, absolute_import,
                        print_function)
 import sys, re, os, json
 from collections import defaultdict
 from itertools import count
 from functools import partial
 from future_builtins import map
 from urlparse import urlparse
 from cssutils import replaceUrls
 from lxml.etree import Comment, tostring
 from calibre.ebooks.oeb.base import (
-    OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML)
+    OEB_DOCS, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML_NS)
 from calibre.ebooks.oeb.iterator.book import extract_book
 from calibre.ebooks.oeb.polish.container import Container as ContainerBase
 from calibre.ebooks.oeb.polish.cover import set_epub_cover, find_cover_image
@ -72,11 +73,16 @@ class Container(ContainerBase):
        # Mark the spine as dirty since we have to ensure it is normalized
        for name in data['spine']:
            self.parsed(name), self.dirty(name)
        self.inject_script(data['spine'])
        self.virtualized_names = set()
        self.virtualize_resources()
        def manifest_data(name):
-            return {'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':self.mime_map.get(name)}
+            mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
            return {
                'size':os.path.getsize(self.name_path_map[name]),
                'is_virtualized': name in self.virtualized_names,
                'mimetype':mt,
                'is_html': mt in OEB_DOCS
            }
        data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
        self.commit()
        for name in excluded_names:
@ -101,20 +107,6 @@ class Container(ContainerBase):
        self.dirty(self.opf_name)
        return raster_cover_name, titlepage_name
    def inject_script(self, spine):
        src = 'injected-script-' + self.book_render_data['link_uid']
        for name in spine:
            root = self.parsed(name)
            head = tuple(root.iterchildren(XHTML('head')))
            head = head[0] if head else root.makeelement(XHTML('head'))
            root.insert(0, head)
            script = root.makeelement(XHTML('script'))
            script.set('type', 'text/javascript')
            script.set('src', src)
            script.set('data-secret', 'secret-key-' + self.book_render_data['link_uid'])
            head.insert(0, script)
            self.dirty(name)
    def virtualize_resources(self):
        changed = set()
@ -146,6 +138,7 @@ class Container(ContainerBase):
            return url
        for name, mt in self.mime_map.iteritems():
            mt = mt.lower()
            if mt in OEB_STYLES:
                replaceUrls(self.parsed(name), partial(link_replacer, name))
                self.virtualized_names.add(name)
@ -157,7 +150,8 @@ class Container(ContainerBase):
                    href = a.get('href')
                    if href.startswith(link_uid):
                        a.set('href', 'javascript:void(0)')
-                        a.set('data-' + link_uid, href.split('|')[1])
+                        parts = decode_url(href.split('|')[1])
                        a.set('data-' + link_uid, json.dumps({'name':parts[0], 'frag':parts[1]}, ensure_ascii=False))
                    else:
                        a.set('target', '_blank')
                    changed.add(name)
@ -171,15 +165,60 @@ class Container(ContainerBase):
        tuple(map(self.dirty, changed))
    def serialize_item(self, name):
-        mt = self.mime_map[name]
+        mt = (self.mime_map[name] or '').lower()
        if mt not in OEB_DOCS:
            return ContainerBase.serialize_item(self, name)
        # Normalize markup
        root = self.parsed(name)
-        for comment in tuple(root.iterdescendants(Comment)):
+        return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
-            comment.getparent().remove(comment)
+
-        escape_cdata(root)
+def split_name(name):
-        return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
+    l, r = name.partition('}')[::2]
    if r:
        return l[1:], r
    return None, l
 def serialize_elem(elem, nsmap):
    ns, name = split_name(elem.tag)
    attribs = []
    ans = {'n':name}
    if elem.text:
        ans['te'] = elem.text
    if elem.tail:
        ans['ta'] = elem.tail
    if ns:
        ns = nsmap[ns]
        if ns:
            ans['ns'] = ns
    for attr, val in elem.items():
        attr_ns, aname = split_name(attr)
        s = {'n':aname, 'v':val}
        if attr_ns:
            attr_ns = nsmap[attr_ns]
            if attr_ns:
                s['ns'] = attr_ns
        attribs.append(s)
    if attribs:
        ans['a'] = attribs
    return ans
 def html_as_dict(root):
    nsmap = defaultdict(count().next)
    nsmap[XHTML_NS]
    tags = [serialize_elem(root, nsmap)]
    tree = {'t':0}
    stack = [(root, tree)]
    while stack:
        elem, node = stack.pop()
        for i, child in enumerate(elem.iterchildren('*')):
            if i == 0:
                node['c'] = []
            cnode = serialize_elem(child, nsmap)
            tags.append(cnode)
            tree_node = {'t':len(tags) - 1}
            node['c'].append(tree_node)
            stack.append((child, tree_node))
    ns_map = [ns for ns, nsnum in sorted(nsmap.iteritems(), key=lambda x: x[1])]
    return {'ns_map':ns_map, 'tag_map':tags, 'tree':tree}
 def render(pathtoebook, output_dir, book_hash=None):
    Container(pathtoebook, output_dir, book_hash=book_hash)