From 5bc925735b56ff8c1a59ec670bf4b639dcdd7206 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Thu, 31 Mar 2016 23:29:14 +0530 Subject: [PATCH] More work on rendering books --- src/calibre/srv/render_book.py | 51 ++++++++++++++---- src/pyj/read_book/iframe.pyj | 7 ++- src/pyj/read_book/resources.pyj | 92 +++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+), 11 deletions(-) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 78849f10fd..8dc9f744a5 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -15,7 +15,7 @@ from urlparse import urlparse from cssutils import replaceUrls from calibre.ebooks.oeb.base import ( - OEB_DOCS, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML_NS, OPF) + OEB_DOCS, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML_NS, OPF, XHTML) from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.cover import set_epub_cover, find_cover_image @@ -188,6 +188,8 @@ boolean_attributes = frozenset('allowfullscreen,async,autofocus,autoplay,checked def serialize_elem(elem, nsmap): ns, name = split_name(elem.tag) + if name.lower() in {'img', 'script', 'link', 'image', 'style'}: + name = name.lower() ans = {'n':name} if elem.text: ans['x'] = elem.text @@ -200,12 +202,12 @@ def serialize_elem(elem, nsmap): attribs = [] for attr, val in elem.items(): attr_ns, aname = split_name(attr) - al = attr.lower() + al = aname.lower() if not attr_ns and al in boolean_attributes: if val and val.lower() in (al, ''): attribs.append([al, al]) continue - attrib = [attr, val] + attrib = [aname, val] if attr_ns: attr_ns = nsmap[attr_ns] if attr_ns: @@ -215,22 +217,53 @@ def serialize_elem(elem, nsmap): ans['a'] = attribs return ans +def ensure_head(root): + # Make sure we have only a single + heads = list(root.iterchildren(XHTML('head'))) + if len(heads) != 1: + if not heads: + root.insert(0, root.makeelement(XHTML('head'))) + return + head = heads[0] + for eh in heads[1:]: + for child in eh.iterchildren('*'): + head.append(child) + +def ensure_body(root): + # Make sure we have only a single + bodies = list(root.iterchildren(XHTML('body'))) + if len(bodies) != 1: + if not bodies: + root.append(root.makeelement(XHTML('body'))) + return + body = bodies[0] + for b in bodies[1:]: + div = root.makeelement(XHTML('div')) + div.attrib.update(b.attrib) + div.text = b.text + for child in b: + div.append(child) + body.append(div) + def html_as_dict(root): + ensure_head(root), ensure_body(root) + for child in tuple(root.iterchildren('*')): + if child.tag.partition('}')[-1] not in ('head', 'body'): + root.remove(child) + root.text = root.tail = None nsmap = defaultdict(count().next) nsmap[XHTML_NS] tags = [serialize_elem(root, nsmap)] - tree = {'t':0} + tree = [0] stack = [(root, tree)] while stack: elem, node = stack.pop() for i, child in enumerate(elem.iterchildren('*')): - if i == 0: - node['c'] = [] cnode = serialize_elem(child, nsmap) tags.append(cnode) - tree_node = {'t':len(tags) - 1} - node['c'].append(tree_node) - stack.append((child, tree_node)) + child_tree_node = [len(tags)-1] + node.append(child_tree_node) + stack.append((child, child_tree_node)) ns_map = [ns for ns, nsnum in sorted(nsmap.iteritems(), key=lambda x: x[1])] return {'ns_map':ns_map, 'tag_map':tags, 'tree':tree} diff --git a/src/pyj/read_book/iframe.pyj b/src/pyj/read_book/iframe.pyj index 0f8ee994b6..cc645ffef5 100644 --- a/src/pyj/read_book/iframe.pyj +++ b/src/pyj/read_book/iframe.pyj @@ -4,7 +4,7 @@ from aes import GCM from gettext import install from read_book.globals import set_boss -from read_book.resources import finalize_resources +from read_book.resources import finalize_resources, unserialize_html class Boss: @@ -53,7 +53,10 @@ class Boss: self.encrypted_communications = True self.book = data.book root_data = finalize_resources(self.book, data.name, data.resource_data) - root_data + unserialize_html(root_data, self.content_loaded.bind(self)) + + def content_loaded(self): + print('Content loaded') def send_message(self, data): if self.encrypted_communications: diff --git a/src/pyj/read_book/resources.pyj b/src/pyj/read_book/resources.pyj index 0175ebb07f..fe1c04d9e4 100644 --- a/src/pyj/read_book/resources.pyj +++ b/src/pyj/read_book/resources.pyj @@ -1,6 +1,7 @@ # vim:fileencoding=utf-8 # License: GPL v3 Copyright: 2016, Kovid Goyal +from dom import clear from encodings import base64decode, utf8_decode JSON_XHTML_MIMETYPE = 'application/calibre+xhtml+json' @@ -142,3 +143,94 @@ def finalize_resources(book, root_name, resource_data): v'delete resource_data[name]' return root_data + +js_types = set('text/javascript text/ecmascript application/javascript application/ecmascript'.split(' ')) +resource_tag_names = {'script':'src', 'link':'href', 'img':'src', 'image':'xlink:href'} + +def apply_attributes(src, elem, ns_map): + attributes = src.a + if not attributes: + return + for a in attributes: + if a[2]: + elem.setAttributeNS(ns_map[a[2]], a[0], a[1]) + else: + elem.setAttribute(a[0], a[1]) + +def process_stack(stack, tag_map, ns_map, load_required, onload): + while stack.length: + node, parent = stack.pop() + src = tag_map[node[0]] + if src.s: + elem = document.createElementNS(ns_map[src.s], src.n) + else: + elem = document.createElement(src.n) + loadable = False + if src.n in resource_tag_names: + attr = resource_tag_names[src.n] + if attr.indexOf(':') != -1: + attr = attr.replace('xlink:', '') + for a in (src.a or v'[]'): + if a[0] is attr: + loadable = str.startswith(a[1], 'blob:') + break + if loadable: + load_required.add(node[0]) + elem.addEventListener('load', onload.bind(node[0])) + elem.addEventListener('error', onload.bind(node[0])) + + apply_attributes(src, elem, ns_map) + parent.appendChild(elem) + if src.x: + if src.n is 'script' and (elem.getAttribute('type') or 'text/javascript').toLowerCase() in js_types: + elem.text = src.x + else: + elem.appendChild(document.createTextNode(src.x)) + if src.l: + parent.appendChild(document.createTextNode(src.l)) + for v'var i = node.length - 1; i >= 1; i--': # noqa: unused-local + stack.push(v'[node[i], elem]') + +def unserialize_html(serialized_data, proceed): + tag_map = serialized_data.tag_map + tree = serialized_data.tree + ns_map = serialized_data.ns_map + html = tag_map[0] + apply_attributes(html, document.documentElement, ns_map) + head, body = tree[1], tree[2] # noqa: unused-local + clear(document.head, document.body) + load_required = set() + proceeded = False + + def hangcheck(): + nonlocal proceeded + if not proceeded: + proceeded = True + proceed() + + def onload(): + nonlocal proceeded + load_required.discard(this) + if not load_required.length: + proceeded = True + proceed() + + stack = v'[]' + for v'var i = head.length - 1; i >= 1; i--': + stack.push(v'[head[i], document.head]') + process_stack(stack, tag_map, ns_map, load_required, onload) + bnode = tag_map[body[0]] + apply_attributes(bnode, document.body, ns_map) + if bnode.x: + document.body.appendChild(document.createTextNode(bnode.x)) + for v'var i = body.length - 1; i >= 1; i--': # noqa: unused-local + stack.push(v'[body[i], document.body]') + process_stack(stack, tag_map, ns_map, load_required, onload) + ev = document.createEvent('Event') + ev.initEvent('DOMContentLoaded', True, True) + document.dispatchEvent(ev) + if load_required.length: + setTimeout(hangcheck, 5000) + else: + proceed = True + proceed()