Start work on in-browser viewer

This commit is contained in:
Kovid Goyal 2016-03-04 16:22:00 +05:30
parent 0cc6c5d901
commit 97734c5039
2 changed files with 144 additions and 0 deletions

View File

@ -91,6 +91,13 @@ class TOC(object):
def __str__(self):
return b'\n'.join([x.encode('utf-8') for x in self.get_lines()])
@property
def as_dict(self):
return {
'title':self.title, 'dest':self.dest, 'frag':self.frag, 'dest_exists':self.dest_exists, 'dest_error':self.dest_error,
'children':[c.as_dict for c in self.children]
}
def child_xpath(tag, name):
return tag.xpath('./*[calibre:lower-case(local-name()) = "%s"]'%name)

View File

@ -0,0 +1,137 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import (unicode_literals, division, absolute_import,
print_function)
import sys, re, os, json
from functools import partial
from future_builtins import map
from urlparse import urlparse
from cssutils import replaceUrls
from lxml.etree import Comment, tostring
from calibre.ebooks.oeb.base import OEB_DOCS, escape_cdata, OEB_STYLES, rewrite_links, XPath, urlunquote, XLINK
from calibre.ebooks.oeb.iterator.book import extract_book
from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.toc import get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.utils.short_uuid import uuid4
from calibre.utils.logging import default_log
def encode_component(x):
return x.replace(',', ',c').replace('|', ',p')
def decode_component(x):
return x.replace(',p', '|').replace(',c', ',')
def encode_url(name, frag=''):
name = encode_component(name)
if frag:
name += ',,' + encode_component(frag)
return name
def decode_url(x):
parts = list(map(decode_component, re.split(',,', x, 1)))
if len(parts) == 1:
parts.append('')
return parts
class Container(ContainerBase):
RENDER_VERSION = 1
tweak_mode = True
def __init__(self, path_to_ebook, tdir, log=None):
log = log or default_log
book_fmt, opfpath, input_fmt = extract_book(path_to_ebook, tdir, log=log)
ContainerBase.__init__(self, tdir, opfpath, log)
excluded_names = {
name for name, mt in self.mime_map.iteritems() if
name == self.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/')
}
self.book_render_data = data = {
'version': self.RENDER_VERSION,
'toc':get_toc(self).as_dict,
'spine':[name for name, is_linear in self.spine_names],
'link_uid': uuid4(),
'is_comic': input_fmt.lower() in {'cbc', 'cbz', 'cbr', 'cb7'},
'manifest': list(set(self.name_path_map) - excluded_names),
}
# Mark the spine as dirty since we have to ensure it is normalized
for name in data['spine']:
self.parsed(name), self.dirty(name)
self.virtualize_resources()
self.commit()
for name in excluded_names:
os.remove(self.name_path_map[name])
with lopen(os.path.join(self.root, 'calibre-book-manifest.json'), 'wb') as f:
f.write(json.dumps(self.book_render_data, ensure_ascii=False).encode('utf-8'))
def virtualize_resources(self):
changed = set()
link_uid = self.book_render_data['link_uid']
resource_template = link_uid + '|{}|'
xlink_xpath = XPath('//*[@xl:href]')
link_xpath = XPath('//h:a[@href]')
def link_replacer(base, url):
if url.startswith('#'):
frag = urlunquote(url[1:])
if not frag:
return url
changed.add(base)
return resource_template.format(encode_url(base, frag))
purl = urlparse(url)
if purl.netloc or purl.query:
return url
if purl.scheme and purl.scheme != 'file':
return url
if not purl.path or purl.path.startswith('/'):
return url
url, frag = purl.path, purl.fragment
name = self.href_to_name(url, base)
if name:
frag = urlunquote(frag)
url = resource_template.format(encode_url(name, frag))
changed.add(base)
return url
for name, mt in self.mime_map.iteritems():
if mt in OEB_STYLES:
replaceUrls(self.parsed(name), partial(link_replacer, name))
elif mt in OEB_DOCS:
root = self.parsed(name)
rewrite_links(root, partial(link_replacer, name))
for a in link_xpath(root):
href = a.get('href')
if href.startswith(link_uid):
a.set('href', 'javascript:void(0)')
a.set('data-' + link_uid, href.split('|')[1])
else:
a.set('target', '_blank')
changed.add(name)
elif mt == 'image/svg+xml':
changed = False
xlink = XLINK('href')
for elem in xlink_xpath(self.parsed(name)):
elem.set(xlink, link_replacer(name, elem.get(xlink)))
tuple(map(self.dirty, changed))
def serialize_item(self, name):
mt = self.mime_map[name]
if mt not in OEB_DOCS:
return ContainerBase.serialize_item(self, name)
# Normalize markup
root = self.parsed(name)
for comment in tuple(root.iterdescendants(Comment)):
comment.getparent().remove(comment)
escape_cdata(root)
return tostring(root, encoding='utf-8', xml_declaration=True, with_tail=False, doctype='<!DOCTYPE html>')
if __name__ == '__main__':
c = Container(sys.argv[-2], sys.argv[-1])