From 97734c50398e6097d5fc7e732b63039c2484bb5c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Fri, 4 Mar 2016 16:22:00 +0530
Subject: [PATCH] Start work on in-browser viewer

---
 src/calibre/ebooks/oeb/polish/toc.py |   7 ++
 src/calibre/srv/render_book.py       | 137 +++++++++++++++++++++++++++
 2 files changed, 144 insertions(+)
 create mode 100644 src/calibre/srv/render_book.py
diff --git a/src/calibre/ebooks/oeb/polish/toc.py b/src/calibre/ebooks/oeb/polish/toc.py
index 6e56c39550..66f356b74e 100644
--- a/src/calibre/ebooks/oeb/polish/toc.py
+++ b/src/calibre/ebooks/oeb/polish/toc.py
@@ -91,6 +91,13 @@ class TOC(object):
     def __str__(self):
         return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
 
+    @property
+    def as_dict(self):
+        return {
+            'title':self.title, 'dest':self.dest, 'frag':self.frag, 'dest_exists':self.dest_exists, 'dest_error':self.dest_error,
+            'children':[c.as_dict for c in self.children]
+        }
+
 def child_xpath(tag, name):
     return tag.xpath('./*[calibre:lower-case(local-name()) = "%s"]'%name)
 
diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py
new file mode 100644
index 0000000000..bd1f1367b2
--- /dev/null
+++ b/src/calibre/srv/render_book.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python2
+# vim:fileencoding=utf-8
+# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (unicode_literals, division, absolute_import,
+                        print_function)
+import sys, re, os, json
+from functools import partial
+from future_builtins import map
+from urlparse import urlparse
+
+from cssutils import replaceUrls
+from lxml.etree import Comment, tostring
+
+from calibre.ebooks.oeb.base import OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK
+from calibre.ebooks.oeb.iterator.book import extract_book
+from calibre.ebooks.oeb.polish.container import Container as ContainerBase
+from calibre.ebooks.oeb.polish.toc import get_toc
+from calibre.ebooks.oeb.polish.utils import guess_type
+from calibre.utils.short_uuid import uuid4
+from calibre.utils.logging import default_log
+
+
+def encode_component(x):
+    return x.replace(',', ',c').replace('|', ',p')
+
+def decode_component(x):
+    return x.replace(',p', '|').replace(',c', ',')
+
+def encode_url(name, frag=''):
+    name = encode_component(name)
+    if frag:
+        name += ',,' + encode_component(frag)
+    return name
+
+def decode_url(x):
+    parts = list(map(decode_component, re.split(',,', x, 1)))
+    if len(parts) == 1:
+        parts.append('')
+    return parts
+
+class Container(ContainerBase):
+
+    RENDER_VERSION = 1
+    tweak_mode = True
+
+    def __init__(self, path_to_ebook, tdir, log=None):
+        log = log or default_log
+        book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
+        ContainerBase.__init__(self, tdir, opfpath, log)
+        excluded_names = {
+            name for name, mt in self.mime_map.iteritems() if
+            name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/')
+        }
+        self.book_render_data = data = {
+            'version': self.RENDER_VERSION,
+            'toc':get_toc(self).as_dict,
+            'spine':[name for name, is_linear in self.spine_names],
+            'link_uid': uuid4(),
+            'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
+            'manifest': list(set(self.name_path_map) - excluded_names),
+        }
+        # Mark the spine as dirty since we have to ensure it is normalized
+        for name in data['spine']:
+            self.parsed(name), self.dirty(name)
+        self.virtualize_resources()
+        self.commit()
+        for name in excluded_names:
+            os.remove(self.name_path_map[name])
+        with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
+            f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
+
+    def virtualize_resources(self):
+
+        changed = set()
+        link_uid = self.book_render_data['link_uid']
+        resource_template = link_uid + '|{}|'
+        xlink_xpath = XPath('//*[@xl:href]')
+        link_xpath = XPath('//h:a[@href]')
+
+        def link_replacer(base, url):
+            if url.startswith('#'):
+                frag = urlunquote(url[1:])
+                if not frag:
+                    return url
+                changed.add(base)
+                return resource_template.format(encode_url(base, frag))
+            purl = urlparse(url)
+            if purl.netloc or purl.query:
+                return url
+            if purl.scheme and purl.scheme != 'file':
+                return url
+            if not purl.path or purl.path.startswith('/'):
+                return url
+            url, frag = purl.path, purl.fragment
+            name = self.href_to_name(url, base)
+            if name:
+                frag = urlunquote(frag)
+                url = resource_template.format(encode_url(name, frag))
+                changed.add(base)
+            return url
+
+        for name, mt in self.mime_map.iteritems():
+            if mt in OEB_STYLES:
+                replaceUrls(self.parsed(name), partial(link_replacer, name))
+            elif mt in OEB_DOCS:
+                root = self.parsed(name)
+                rewrite_links(root, partial(link_replacer, name))
+                for a in link_xpath(root):
+                    href = a.get('href')
+                    if href.startswith(link_uid):
+                        a.set('href', 'javascript:void(0)')
+                        a.set('data-' + link_uid, href.split('|')[1])
+                    else:
+                        a.set('target', '_blank')
+                    changed.add(name)
+            elif mt == 'image/svg+xml':
+                changed = False
+                xlink = XLINK('href')
+                for elem in xlink_xpath(self.parsed(name)):
+                    elem.set(xlink, link_replacer(name, elem.get(xlink)))
+
+        tuple(map(self.dirty, changed))
+
+    def serialize_item(self, name):
+        mt = self.mime_map[name]
+        if mt not in OEB_DOCS:
+            return ContainerBase.serialize_item(self, name)
+        # Normalize markup
+        root = self.parsed(name)
+        for comment in tuple(root.iterdescendants(Comment)):
+            comment.getparent().remove(comment)
+        escape_cdata(root)
+        return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
+
+if __name__ == '__main__':
+    c = Container(sys.argv[-2], sys.argv[-1])