When rendering books serialize HTML as JSON

Can be used to easily dynamically populate the DOM using the DOM API.
Need to do this since document.write() is flaky in some browsers
This commit is contained in:
Kovid Goyal 2016-03-26 10:36:51 +05:30
parent 49962d34da
commit 5372b390d7

View File

@ -5,15 +5,16 @@
from __future__ import (unicode_literals, division, absolute_import, from __future__ import (unicode_literals, division, absolute_import,
print_function) print_function)
import sys, re, os, json import sys, re, os, json
from collections import defaultdict
from itertools import count
from functools import partial from functools import partial
from future_builtins import map from future_builtins import map
from urlparse import urlparse from urlparse import urlparse
from cssutils import replaceUrls from cssutils import replaceUrls
from lxml.etree import Comment, tostring
from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.base import (
OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML) OEB_DOCS, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK, XHTML_NS)
from calibre.ebooks.oeb.iterator.book import extract_book from calibre.ebooks.oeb.iterator.book import extract_book
from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.cover import set_epub_cover, find_cover_image from calibre.ebooks.oeb.polish.cover import set_epub_cover, find_cover_image
@ -72,11 +73,16 @@ class Container(ContainerBase):
# Mark the spine as dirty since we have to ensure it is normalized # Mark the spine as dirty since we have to ensure it is normalized
for name in data['spine']: for name in data['spine']:
self.parsed(name), self.dirty(name) self.parsed(name), self.dirty(name)
self.inject_script(data['spine'])
self.virtualized_names = set() self.virtualized_names = set()
self.virtualize_resources() self.virtualize_resources()
def manifest_data(name): def manifest_data(name):
return {'size':os.path.getsize(self.name_path_map[name]), 'is_virtualized': name in self.virtualized_names, 'mimetype':self.mime_map.get(name)} mt = (self.mime_map.get(name) or 'application/octet-stream').lower()
return {
'size':os.path.getsize(self.name_path_map[name]),
'is_virtualized': name in self.virtualized_names,
'mimetype':mt,
'is_html': mt in OEB_DOCS
}
data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names} data['files'] = {name:manifest_data(name) for name in set(self.name_path_map) - excluded_names}
self.commit() self.commit()
for name in excluded_names: for name in excluded_names:
@ -101,20 +107,6 @@ class Container(ContainerBase):
self.dirty(self.opf_name) self.dirty(self.opf_name)
return raster_cover_name, titlepage_name return raster_cover_name, titlepage_name
def inject_script(self, spine):
src = 'injected-script-' + self.book_render_data['link_uid']
for name in spine:
root = self.parsed(name)
head = tuple(root.iterchildren(XHTML('head')))
head = head[0] if head else root.makeelement(XHTML('head'))
root.insert(0, head)
script = root.makeelement(XHTML('script'))
script.set('type', 'text/javascript')
script.set('src', src)
script.set('data-secret', 'secret-key-' + self.book_render_data['link_uid'])
head.insert(0, script)
self.dirty(name)
def virtualize_resources(self): def virtualize_resources(self):
changed = set() changed = set()
@ -146,6 +138,7 @@ class Container(ContainerBase):
return url return url
for name, mt in self.mime_map.iteritems(): for name, mt in self.mime_map.iteritems():
mt = mt.lower()
if mt in OEB_STYLES: if mt in OEB_STYLES:
replaceUrls(self.parsed(name), partial(link_replacer, name)) replaceUrls(self.parsed(name), partial(link_replacer, name))
self.virtualized_names.add(name) self.virtualized_names.add(name)
@ -157,7 +150,8 @@ class Container(ContainerBase):
href = a.get('href') href = a.get('href')
if href.startswith(link_uid): if href.startswith(link_uid):
a.set('href', 'javascript:void(0)') a.set('href', 'javascript:void(0)')
a.set('data-' + link_uid, href.split('|')[1]) parts = decode_url(href.split('|')[1])
a.set('data-' + link_uid, json.dumps({'name':parts[0], 'frag':parts[1]}, ensure_ascii=False))
else: else:
a.set('target', '_blank') a.set('target', '_blank')
changed.add(name) changed.add(name)
@ -171,15 +165,60 @@ class Container(ContainerBase):
tuple(map(self.dirty, changed)) tuple(map(self.dirty, changed))
def serialize_item(self, name): def serialize_item(self, name):
mt = self.mime_map[name] mt = (self.mime_map[name] or '').lower()
if mt not in OEB_DOCS: if mt not in OEB_DOCS:
return ContainerBase.serialize_item(self, name) return ContainerBase.serialize_item(self, name)
# Normalize markup
root = self.parsed(name) root = self.parsed(name)
for comment in tuple(root.iterdescendants(Comment)): return json.dumps(html_as_dict(root), ensure_ascii=False, separators=(',', ':')).encode('utf-8')
comment.getparent().remove(comment)
escape_cdata(root) def split_name(name):
return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>') l, r = name.partition('}')[::2]
if r:
return l[1:], r
return None, l
def serialize_elem(elem, nsmap):
ns, name = split_name(elem.tag)
attribs = []
ans = {'n':name}
if elem.text:
ans['te'] = elem.text
if elem.tail:
ans['ta'] = elem.tail
if ns:
ns = nsmap[ns]
if ns:
ans['ns'] = ns
for attr, val in elem.items():
attr_ns, aname = split_name(attr)
s = {'n':aname, 'v':val}
if attr_ns:
attr_ns = nsmap[attr_ns]
if attr_ns:
s['ns'] = attr_ns
attribs.append(s)
if attribs:
ans['a'] = attribs
return ans
def html_as_dict(root):
nsmap = defaultdict(count().next)
nsmap[XHTML_NS]
tags = [serialize_elem(root, nsmap)]
tree = {'t':0}
stack = [(root, tree)]
while stack:
elem, node = stack.pop()
for i, child in enumerate(elem.iterchildren('*')):
if i == 0:
node['c'] = []
cnode = serialize_elem(child, nsmap)
tags.append(cnode)
tree_node = {'t':len(tags) - 1}
node['c'].append(tree_node)
stack.append((child, tree_node))
ns_map = [ns for ns, nsnum in sorted(nsmap.iteritems(), key=lambda x: x[1])]
return {'ns_map':ns_map, 'tag_map':tags, 'tree':tree}
def render(pathtoebook, output_dir, book_hash=None): def render(pathtoebook, output_dir, book_hash=None):
Container(pathtoebook, output_dir, book_hash=book_hash) Container(pathtoebook, output_dir, book_hash=book_hash)