From 97734c50398e6097d5fc7e732b63039c2484bb5c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 4 Mar 2016 16:22:00 +0530 Subject: [PATCH] Start work on in-browser viewer --- src/calibre/ebooks/oeb/polish/toc.py | 7 ++ src/calibre/srv/render_book.py | 137 +++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 src/calibre/srv/render_book.py diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py index 6e56c39550..66f356b74e 100644 --- a/src/calibre/ebooks/oeb/polish/toc.py +++ b/src/calibre/ebooks/oeb/polish/toc.py @@ -91,6 +91,13 @@ class TOC(object): def __str__(self): return b'\n'.join([x.encode('utf-8') for x in self.get_lines()]) + @property + def as_dict(self): + return { + 'title':self.title, 'dest':self.dest, 'frag':self.frag, 'dest_exists':self.dest_exists, 'dest_error':self.dest_error, + 'children':[c.as_dict for c in self.children] + } + def child_xpath(tag, name): return tag.xpath('./*[calibre:lower-case(local-name()) = "%s"]'%name) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py new file mode 100644 index 0000000000..bd1f1367b2 --- /dev/null +++ b/src/calibre/srv/render_book.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python2 +# vim:fileencoding=utf-8 +# License: GPLv3 Copyright: 2016, Kovid Goyal + +from __future__ import (unicode_literals, division, absolute_import, + print_function) +import sys, re, os, json +from functools import partial +from future_builtins import map +from urlparse import urlparse + +from cssutils import replaceUrls +from lxml.etree import Comment, tostring + +from calibre.ebooks.oeb.base import OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK +from calibre.ebooks.oeb.iterator.book import extract_book +from calibre.ebooks.oeb.polish.container import Container as ContainerBase +from calibre.ebooks.oeb.polish.toc import get_toc +from calibre.ebooks.oeb.polish.utils import guess_type +from calibre.utils.short_uuid import uuid4 +from calibre.utils.logging import default_log + + +def encode_component(x): + return x.replace(',', ',c').replace('|', ',p') + +def decode_component(x): + return x.replace(',p', '|').replace(',c', ',') + +def encode_url(name, frag=''): + name = encode_component(name) + if frag: + name += ',,' + encode_component(frag) + return name + +def decode_url(x): + parts = list(map(decode_component, re.split(',,', x, 1))) + if len(parts) == 1: + parts.append('') + return parts + +class Container(ContainerBase): + + RENDER_VERSION = 1 + tweak_mode = True + + def __init__(self, path_to_ebook, tdir, log=None): + log = log or default_log + book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log) + ContainerBase.__init__(self, tdir, opfpath, log) + excluded_names = { + name for name, mt in self.mime_map.iteritems() if + name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') + } + self.book_render_data = data = { + 'version': self.RENDER_VERSION, + 'toc':get_toc(self).as_dict, + 'spine':[name for name, is_linear in self.spine_names], + 'link_uid': uuid4(), + 'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'}, + 'manifest': list(set(self.name_path_map) - excluded_names), + } + # Mark the spine as dirty since we have to ensure it is normalized + for name in data['spine']: + self.parsed(name), self.dirty(name) + self.virtualize_resources() + self.commit() + for name in excluded_names: + os.remove(self.name_path_map[name]) + with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f: + f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8')) + + def virtualize_resources(self): + + changed = set() + link_uid = self.book_render_data['link_uid'] + resource_template = link_uid + '|{}|' + xlink_xpath = XPath('//*[@xl:href]') + link_xpath = XPath('//h:a[@href]') + + def link_replacer(base, url): + if url.startswith('#'): + frag = urlunquote(url[1:]) + if not frag: + return url + changed.add(base) + return resource_template.format(encode_url(base, frag)) + purl = urlparse(url) + if purl.netloc or purl.query: + return url + if purl.scheme and purl.scheme != 'file': + return url + if not purl.path or purl.path.startswith('/'): + return url + url, frag = purl.path, purl.fragment + name = self.href_to_name(url, base) + if name: + frag = urlunquote(frag) + url = resource_template.format(encode_url(name, frag)) + changed.add(base) + return url + + for name, mt in self.mime_map.iteritems(): + if mt in OEB_STYLES: + replaceUrls(self.parsed(name), partial(link_replacer, name)) + elif mt in OEB_DOCS: + root = self.parsed(name) + rewrite_links(root, partial(link_replacer, name)) + for a in link_xpath(root): + href = a.get('href') + if href.startswith(link_uid): + a.set('href', 'javascript:void(0)') + a.set('data-' + link_uid, href.split('|')[1]) + else: + a.set('target', '_blank') + changed.add(name) + elif mt == 'image/svg+xml': + changed = False + xlink = XLINK('href') + for elem in xlink_xpath(self.parsed(name)): + elem.set(xlink, link_replacer(name, elem.get(xlink))) + + tuple(map(self.dirty, changed)) + + def serialize_item(self, name): + mt = self.mime_map[name] + if mt not in OEB_DOCS: + return ContainerBase.serialize_item(self, name) + # Normalize markup + root = self.parsed(name) + for comment in tuple(root.iterdescendants(Comment)): + comment.getparent().remove(comment) + escape_cdata(root) + return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='') + +if __name__ == '__main__': + c = Container(sys.argv[-2], sys.argv[-1])