From a62bd36fe448a3512ed59f0c01096944873c01a0 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 6 Apr 2021 13:20:15 +0530 Subject: [PATCH] E-book viewer: Speed up first time open for EPUB files with lots of styling The transformation of CSS is now in native code and an order of magnitude faster. For typical novel length books the total time for first load processing has been halved. --- src/calibre/srv/render_book.py | 154 +++++++++++---------------------- 1 file changed, 51 insertions(+), 103 deletions(-) diff --git a/src/calibre/srv/render_book.py b/src/calibre/srv/render_book.py index 5cb5a707eb..60466d8830 100644 --- a/src/calibre/srv/render_book.py +++ b/src/calibre/srv/render_book.py @@ -12,17 +12,12 @@ from collections import defaultdict from datetime import datetime from functools import partial from itertools import count -from math import ceil - -from css_parser import replaceUrls -from css_parser.css import CSSRule from lxml.etree import Comment +from math import ceil from calibre import detect_ncpus, force_unicode, prepare_string_for_xml from calibre.constants import iswindows from calibre.customize.ui import plugin_for_input_format -from calibre.ebooks import parse_css_length -from calibre.ebooks.css_transform_rules import StyleDeclaration from calibre.ebooks.oeb.base import ( OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath, rewrite_links, urlunquote @@ -32,7 +27,7 @@ from calibre.ebooks.oeb.polish.container import Container as ContainerBase from calibre.ebooks.oeb.polish.cover import ( find_cover_image, find_cover_image_in_page, find_cover_page ) -from calibre.ebooks.oeb.polish.css import transform_inline_styles +from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc from calibre.ebooks.oeb.polish.utils import guess_type from calibre.ptempfile import PersistentTemporaryDirectory @@ -46,13 +41,14 @@ from calibre.utils.serialize import ( json_dumps, json_loads, msgpack_dumps, msgpack_loads ) from calibre.utils.short_uuid import uuid4 +from calibre_extensions import speedup +from calibre_extensions.fast_css_transform import transform_properties from polyglot.binary import ( as_base64_unicode as encode_component, from_base64_bytes, from_base64_unicode as decode_component ) from polyglot.builtins import as_bytes, iteritems, map, unicode_type from polyglot.urllib import quote, urlparse -from calibre_extensions import speedup RENDER_VERSION = 1 @@ -85,19 +81,6 @@ def decode_url(x): return decode_component(parts[0]), (parts[1] if len(parts) > 1 else '') -absolute_units = frozenset('px mm cm pt in pc q'.split()) -length_factors = {'mm':2.8346456693, 'cm':28.346456693, 'in': 72, 'pc': 12, 'q':0.708661417325} - - -def convert_fontsize(length, unit, base_font_size=16.0, dpi=96.0): - ' Convert font size to rem so that font size scaling works. Assumes the document has the specified base font size in px ' - if unit == 'px': - return length/base_font_size - pt_to_px = dpi / 72.0 - pt_to_rem = pt_to_px / base_font_size - return length * length_factors.get(unit, 1) * pt_to_rem - - def create_link_replacer(container, link_uid, changed): resource_template = link_uid + '|{}|' @@ -134,65 +117,6 @@ def create_link_replacer(container, link_uid, changed): return link_replacer -page_break_properties = ('page-break-before', 'page-break-after', 'page-break-inside') -absolute_font_sizes = { - 'xx-small': '0.5rem', 'x-small': '0.625rem', 'small': '0.8rem', - 'medium': '1rem', - 'large': '1.125rem', 'x-large': '1.5rem', 'xx-large': '2rem', 'xxx-large': '2.55rem' -} -nonstandard_writing_mode_property_names = ('-webkit-writing-mode', '-epub-writing-mode') - - -def transform_declaration(decl): - decl = StyleDeclaration(decl) - changed = False - nonstandard_writing_mode_props = {} - standard_writing_mode_props = {} - - for prop, parent_prop in tuple(decl): - if prop.name in page_break_properties: - changed = True - name = prop.name.partition('-')[2] - for prefix in ('', '-webkit-column-'): - # Note that Firefox does not support break-after at all - # https://bugzil.la/549114 - decl.set_property(prefix + name, prop.value, prop.priority) - decl.remove_property(prop, parent_prop) - elif prop.name == 'font-size': - raw = prop.value - afs = absolute_font_sizes.get(raw) - if afs is not None: - changed = True - decl.change_property(prop, parent_prop, afs) - continue - l, unit = parse_css_length(raw) - if unit in absolute_units: - changed = True - l = convert_fontsize(l, unit) - decl.change_property(prop, parent_prop, unicode_type(l) + 'rem') - elif prop.name in nonstandard_writing_mode_property_names: - nonstandard_writing_mode_props[prop.value] = prop.priority - elif prop.name == 'writing-mode': - standard_writing_mode_props[prop.value] = True - - # Add standard writing-mode properties if they don't exist so that - # all of the browsers supported by the viewer work in vertical modes - for value, priority in nonstandard_writing_mode_props.items(): - if value not in standard_writing_mode_props: - decl.set_property('writing-mode', value, priority) - changed = True - - return changed - - -def transform_sheet(sheet): - changed = False - for rule in sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE): - if transform_declaration(rule.style): - changed = True - return changed - - def check_for_maths(root): for x in root.iterdescendants('{*}math'): return True @@ -361,27 +285,25 @@ def create_cover_page(container, input_fmt, is_comic, book_metadata=None): def transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names): changed = False - sheet = container.parsed(name) + link_replacer = None if virtualize_resources: changed_names = set() - link_replacer = create_link_replacer(container, link_uid, changed_names) - replaceUrls(sheet, partial(link_replacer, name)) + link_replacer = partial(create_link_replacer(container, link_uid, changed_names), name) if name in changed_names: changed = True virtualized_names.add(name) - if transform_sheet(sheet): + raw = container.raw_data(name, decode=True) + nraw = transform_properties(raw, is_declaration=False, url_callback=link_replacer) + if nraw != raw: changed = True - if changed: - raw = container.serialize_item(name) - else: - raw = container.raw_data(name, decode=False) + raw = nraw raw = raw.lstrip() - if not raw.startswith(b'@charset'): - raw = b'@charset "UTF-8";\n' + raw + if not raw.startswith('@charset'): + raw = '@charset "UTF-8";\n' + raw changed = True if changed: with container.open(name, 'wb') as f: - f.write(raw) + f.write(raw.encode('utf-8')) def transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names): @@ -402,6 +324,26 @@ def transform_svg_image(container, name, link_uid, virtualize_resources, virtual container.commit_item(name) +def transform_inline_styles(container, name, transform_sheet, transform_style): + root = container.parsed(name) + changed = False + for style in root.xpath('//*[local-name()="style"]'): + if style.text and (style.get('type') or 'text/css').lower() == 'text/css': + nraw = transform_sheet(style.text) + if nraw != style.text: + changed = True + style.text = nraw + pretty_script_or_style(container, style) + for elem in root.xpath('//*[@style]'): + text = elem.get('style', None) + if text: + ntext = transform_style(text) + if ntext != text: + changed = True + elem.set('style', ntext) + return changed + + def transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names): link_xpath = XPath('//h:a[@href]') svg_link_xpath = XPath('//svg:a') @@ -425,17 +367,21 @@ def transform_html(container, name, virtualize_resources, link_uid, link_to_map, if ltype != 'text/css' or rel != 'stylesheet': link.attrib.clear() - def transform_and_virtualize_sheet(sheet): - changed = transform_sheet(sheet) - if virtualize_resources: - replaceUrls(sheet, partial(link_replacer, name)) - if name in changed_names: - virtualized_names.add(name) - changed = True - return changed + # URLs in the inline CSS will be replaced in virtualize_html + def transform_sheet(sheet_text): + ans = transform_properties(sheet_text, is_declaration=False) + if name in changed_names: + virtualized_names.add(name) + return ans + + def transform_declaration(decl_text): + ans = transform_properties(decl_text, is_declaration=True) + if name in changed_names: + virtualized_names.add(name) + return ans # Transform