calibre/src/pyj/read_book/resources.pyj

# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from __python__ import hash_literals

from elementmaker import E
from encodings import base64decode, utf8_decode

from dom import clear, remove_all_attributes
from read_book.globals import runtime, ui_operations
from read_book.settings import opts

JSON_XHTML_MIMETYPE = 'application/calibre+xhtml+json'

def decode_component(x):
    return utf8_decode(base64decode(x))

def decode_url(x):
    parts = x.split('#', 1)
    return decode_component(parts[0]), parts[1] or ''

def create_link_pat(book):
    return RegExp(book.manifest.link_uid + r'\|([^|]+)\|', 'g')

def load_resources(book, root_name, previous_resources, proceed):
    ans = Object.create(None)
    pending_resources = v'[root_name]'
    link_pat = create_link_pat(book)

    def do_one():
        name = pending_resources.shift()
        if not name:
            for k in previous_resources:
                v'delete previous_resources[k]'
            if book.manifest.files[root_name].has_maths:
                return load_mathjax(book, ans, proceed)
            return proceed(ans)
        if ans[name]:
            return setTimeout(do_one, 0)
        if previous_resources[name]:
            ans[name] = data = previous_resources[name]
            if jstype(data[0]) is 'string':
                find_virtualized_resources(data[0])
            return setTimeout(do_one, 0)
        ui_operations.get_file(book, name, got_one)

    def got_one(data, name, mimetype):
        ans[name] = v'[data, mimetype]'
        if jstype(data) is 'string' and book.manifest.files[name]?.is_virtualized:
            find_virtualized_resources(data)
        return setTimeout(do_one, 0)

    def find_virtualized_resources(text):
        seen = set()
        already_pending = {x.name for x in pending_resources}
        link_pat.lastIndex = 0
        while True:
            m = link_pat.exec(text)
            if not m:
                break
            name = decode_url(m[1])[0]
            if name in seen or name in already_pending:
                continue
            seen.add(name)
            pending_resources.push(name)

    do_one()

mathjax_data = None

def load_mathjax(book, resource_data, proceed):
    if mathjax_data is None:
        ui_operations.get_mathjax_files(def(data):
            nonlocal mathjax_data
            mathjax_data = data
            resource_data['..mathjax-files..'] = data
            proceed(resource_data)
        )
    else:
        resource_data['..mathjax-files..'] = mathjax_data
        proceed(resource_data)

def finalize_resources(book, root_name, resource_data):
    blob_url_map = Object.create(None)
    root_data = None
    link_pat = create_link_pat(book)
    mathjax = resource_data['..mathjax-files..']
    v'delete resource_data["..mathjax-files.."]'

    # Resolve the non virtualized resources immediately
    for name in resource_data:
        data, mimetype = resource_data[name]
        if jstype(data) is not 'string':
            blob_url_map[name] = window.URL.createObjectURL(data)
    for name in blob_url_map:
        v'delete resource_data[name]'

    def add_virtualized_resource(name, text, mimetype):
        nonlocal root_data
        if name is root_name:
            root_data = JSON.parse(text)
        else:
            blob_url_map[name] = window.URL.createObjectURL(Blob([text], {'type': mimetype}))

    def replace_deps(text):
        replacements = v'[]'
        unresolved_deps = set()
        link_pat.lastIndex = 0
        while True:
            m = link_pat.exec(text)
            if not m:
                break
            dname, frag = decode_url(m[1])
            if blob_url_map[dname]:
                rtext = blob_url_map[dname]
                if frag:
                    rtext += '#' + frag
                replacements.push(v'[m.index, m[0].length, rtext]')
            else:
                unresolved_deps.add(dname)
        for index, sz, repl in reversed(replacements):
            text = text[:index] + repl + text[index + sz:]
        return unresolved_deps, text

    unresolved_deps_map = {}

    def has_unresolvable_deps(name):
        deps = unresolved_deps_map[name]
        if not deps or not deps.length:
            return False
        for x in deps:
            if not blob_url_map[x]:
                return True
        return False

    while True:
        resolved = v'[]'
        num = 0
        for name in resource_data:
            if not blob_url_map[name]:
                num += 1
                text, mimetype = resource_data[name]
                if not has_unresolvable_deps(name):
                    unresolved_deps, text = replace_deps(text)
                    unresolved_deps_map[name] = unresolved_deps
                    if not unresolved_deps.length:
                        add_virtualized_resource(name, text, mimetype)
                        resolved.push(name)
        if not num:
            break
        if not resolved.length:
            unresolved = [name for name in resource_data if not blob_url_map[name]]
            print('ERROR: Could not resolve all dependencies of {} because of a cyclic dependency. Remaining deps: {}'.format(root_name, unresolved))
            # Add the items anyway, without resolving remaining deps
            for name in resource_data:
                if not blob_url_map[name]:
                    text, mimetype = resource_data[name]
                    text = replace_deps(text)[1]
                    add_virtualized_resource(name, text, mimetype)
            break
        for name in resolved:
            v'delete resource_data[name]'

    return root_data, mathjax, blob_url_map

js_types = {k: True for k in 'text/javascript text/ecmascript application/javascript application/ecmascript'.split(' ')}
resource_tag_names = {'script':'src', 'link':'href', 'img':'src', 'image':'xlink:href'}
ns_rmap = {'http://www.w3.org/2000/svg':'svg', 'http://www.w3.org/1999/xlink':'xlink', 'http://www.w3.org/1998/Math/MathML':'math', 'http://www.w3.org/XML/1998/namespace': 'xml', 'http://www.idpf.org/2007/ops': 'epub'}
ns_count = 0
hide_tooltips = False

def get_prefix(ns):
    nonlocal ns_count
    ans = ns_rmap[ns]
    if not ans:
        ns_rmap[ns] = ans = 'ns' + ns_count
        ns_count += 1
    return ans + ':'

def apply_attributes(src, elem, ns_map):
    attributes = src.a
    if not attributes:
        return
    for a in attributes:
        if a[2]:
            ns = ns_map[a[2]]
            elem.setAttributeNS(ns, get_prefix(ns) + a[0], a[1])
        else:
            name = a[0]
            if hide_tooltips and (name is 'title' or name is 'alt'):
                continue
            elem.setAttribute(name, a[1])


def is_loadable_link(attributes):
    for a in attributes:
        if a[0].toLowerCase() is 'rel' and a[1]:
            for x in a[1].split(' '):
                if x.toLowerCase() is 'stylesheet':
                    return True
    return False


def process_stack(stack, tag_map, ns_map, load_required, onload):
    while stack.length:
        node, parent = stack.pop()
        if tag_map:
            tag_id = node[0]
            src = tag_map[tag_id]
        else:
            src = node
            tag_id = v'process_stack.tag_id++'
        if src.s:
            if src.n:
                elem = document.createElementNS(ns_map[src.s], src.n)
            else:
                if src.l:
                    parent.appendChild(document.createTextNode(src.l))
                continue
        else:
            elem = document.createElement(src.n)
        loadable = False
        attr = resource_tag_names[src.n]
        if attr:
            if attr.indexOf(':') != -1:
                attr = attr.replace('xlink:', '')
            if src.a:
                for a in src.a:
                    if a[0] is attr:
                        loadable = is_loadable_link(src.a) if src.n is 'link' else True
                        break
        if loadable:
            load_required.add(tag_id)
            load_callback = onload.bind(tag_id)
            elem.addEventListener('load', load_callback)
            elem.addEventListener('error', load_callback)

        apply_attributes(src, elem, ns_map)
        parent.appendChild(elem)
        if src.x:
            if src.n is 'script' and js_types[(elem.getAttribute('type') or 'text/javascript').toLowerCase()] is True:
                elem.text = src.x
            else:
                elem.appendChild(document.createTextNode(src.x))
        if src.l:
            parent.appendChild(document.createTextNode(src.l))
        if tag_map:
            for v'var i = node.length - 1; i >= 1; i--':  # noqa: unused-local
                stack.push(v'[node[i], elem]')
        elif node.c:
            for v'var i = node.c.length; i-- > 0;':  # noqa: unused-local
                stack.push(v'[node.c[i], elem]')


def unserialize_html(serialized_data, proceed, postprocess_dom, root_name):
    nonlocal hide_tooltips
    hide_tooltips = opts.hide_tooltips
    if serialized_data.tag_map:
        return unserialize_html_legacy(serialized_data, proceed, postprocess_dom, root_name)
    html = serialized_data.tree
    ns_map = serialized_data.ns_map
    remove_all_attributes(document.documentElement, document.head, document.body)
    clear(document.head, document.body)
    apply_attributes(html, document.documentElement, ns_map)
    # hide browser scrollbars while loading since they will anyway be hidden
    # after loading and this prevents extra layouts
    document.head.appendChild(
        E.style(type='text/css', 'html::-webkit-scrollbar, body::-webkit-scrollbar { display: none !important }')
    )
    if runtime.is_standalone_viewer and root_name:
        if root_name.indexOf('/') > -1:
            base = window.location.pathname.rpartition('/')[0]
            base = f'{window.location.protocol}//{window.location.hostname}{base}/' + root_name
            document.head.appendChild(E.base(href=base))


    # Default stylesheet
    if not runtime.is_standalone_viewer:
        # for the standalone viewer the default font family is set
        # in the viewer settings
        document.head.appendChild(E.style(type='text/css', 'html {{ font-family: {} }}'.format(window.default_font_family or "sans-serif")))

    load_required = set()
    proceeded = False
    hang_timeout = 5

    def hangcheck():
        nonlocal proceeded
        if not proceeded:
            proceeded = True
            print(f'WARNING: All resources did not load in {hang_timeout} seconds, proceeding anyway ({load_required.length} resources left)')
            proceed()

    def onload():
        nonlocal proceeded
        load_required.discard(this)
        if not load_required.length and not proceeded:
            proceeded = True
            proceed()

    def process_children(node, parent):
        if not node.c:
            return
        stack = v'[]'
        for v'var i = node.c.length; i-- > 0;':  # noqa: unused-local
            child = v'node.c[i]'
            if child.n is not 'meta' and child.n is not 'base':
                stack.push(v'[child, parent]')
        process_stack(stack, None, ns_map, load_required, onload)


    body_done = False
    process_stack.tag_id = 1
    for child in html.c:
        if child.n is 'head':
            process_children(child, document.head)
        elif child.n is 'body':
            if not document.body:
                document.documentElement.appendChild(document.createElement('body'))
            if not body_done:
                body_done = True
                apply_attributes(child, document.body, ns_map)
            if child.x:
                document.body.appendChild(document.createTextNode(child.x))
            process_children(child, document.body)

    if postprocess_dom:
        postprocess_dom()
    ev = document.createEvent('Event')
    ev.initEvent('DOMContentLoaded', True, True)
    document.dispatchEvent(ev)
    if load_required.length:
        setTimeout(hangcheck, hang_timeout * 1000)
    else:
        proceeded = True
        proceed()


def unserialize_html_legacy(serialized_data, proceed, postprocess_dom, root_name):
    tag_map = serialized_data.tag_map
    tree = serialized_data.tree
    ns_map = serialized_data.ns_map
    html = tag_map[0]
    remove_all_attributes(document.documentElement)
    apply_attributes(html, document.documentElement, ns_map)
    head, body = tree[1], tree[2]  # noqa: unused-local
    clear(document.head, document.body)
    remove_all_attributes(document.head, document.body)
    # hide browser scrollbars while loading since they will anyway be hidden
    # after loading and this prevents extra layouts
    document.head.appendChild(
        E.style(type='text/css', 'html::-webkit-scrollbar, body::-webkit-scrollbar { display: none !important }')
    )
    if runtime.is_standalone_viewer and root_name:
        if root_name.indexOf('/') > -1:
            base = window.location.pathname.rpartition('/')[0]
            base = f'{window.location.protocol}//{window.location.hostname}{base}/' + root_name
            document.head.appendChild(E.base(href=base))


    # Default stylesheet
    if not runtime.is_standalone_viewer:
        # for the standalone viewer the default font family is set
        # in the viewer settings
        document.head.appendChild(E.style(type='text/css', 'html {{ font-family: {} }}'.format(window.default_font_family or "sans-serif")))
    load_required = set()
    proceeded = False
    hang_timeout = 5

    def hangcheck():
        nonlocal proceeded
        if not proceeded:
            proceeded = True
            print(f'WARNING: All resources did not load in {hang_timeout} seconds, proceeding anyway ({load_required.length} resources left)')
            proceed()

    def onload():
        nonlocal proceeded
        load_required.discard(this)
        if not load_required.length and not proceeded:
            proceeded = True
            proceed()

    stack = v'[]'
    for v'var i = head.length - 1; i >= 1; i--':
        stack.push(v'[head[i], document.head]')
    process_stack(stack, tag_map, ns_map, load_required, onload)
    bnode = tag_map[body[0]]
    apply_attributes(bnode, document.body, ns_map)
    if bnode.x:
        document.body.appendChild(document.createTextNode(bnode.x))
    for v'var i = body.length - 1; i >= 1; i--':  # noqa: unused-local
        stack.push(v'[body[i], document.body]')
    process_stack(stack, tag_map, ns_map, load_required, onload)
    if postprocess_dom:
        postprocess_dom()
    ev = document.createEvent('Event')
    ev.initEvent('DOMContentLoaded', True, True)
    document.dispatchEvent(ev)
    if load_required.length:
        setTimeout(hangcheck, hang_timeout * 1000)
    else:
        proceeded = True
        proceed()


def text_from_serialized_html(data, get_anchor_offset_map):
    serialized_data = JSON.parse(data)
    tag_map = serialized_data.tag_map
    ans = v'[]'
    no_visit = {'script': True, 'style': True, 'title': True, 'head': True}
    ignore_text = {'img': True, 'math': True, 'rt': True, 'rp': True, 'rtc': True}
    ignore_text
    if tag_map:
        stack = v'[[serialized_data.tree[2], false]]'
    else:
        stack = v'[]'
        for child in serialized_data.tree.c:
            if child.n is 'body':
                stack.push(v'[child, false]')
    anchor_offset_map = {}
    text_pos = 0
    while stack.length:
        node, text_ignored_in_parent = stack.pop()
        if jstype(node) is 'string':
            ans.push(node)
            text_pos += node.length
            continue
        src = tag_map[node[0]] if tag_map else node
        if get_anchor_offset_map and src.a:
            for v'var i = 0; i < src.a.length; i++':
                x = src.a[i]
                if x[0] is 'id':
                    aid = x[1]
                    if jstype(anchor_offset_map[aid]) is not 'number':
                        anchor_offset_map[aid] = text_pos
        if no_visit[src.n]:
            continue
        ignore_text_in_node_and_children = text_ignored_in_parent or v'!!ignore_text[src.n]'
        if not ignore_text_in_node_and_children and src.x:
            ans.push(src.x)
            text_pos += src.x.length
        if not text_ignored_in_parent and src.l:
            stack.push(v'[src.l, ignore_text_in_node_and_children]')
        if tag_map:
            for v'var i = node.length - 1; i >= 1; i--':
                stack.push(v'[node[i], ignore_text_in_node_and_children]')
        else:
            if src.c:
                for v'var i = src.c.length; i-- > 0;':
                    stack.push(v'[src.c[i], ignore_text_in_node_and_children]')
    ans = ans.join('')
    if get_anchor_offset_map:
        return ans, anchor_offset_map
    return ans