Conversion: Performance improvement for books that have many HTML files that all include the same CSS stylesheets

Now the conversion engine will flatten the same sequence of sheets only once. Merge branch 'master' of https://github.com/princesse-framboise/calibre
2025-07-09 03:04:10 -04:00 · 2019-06-14 10:10:05 +05:30 · 2019-06-14 10:10:05 +05:30 · ca4fd1f381
commit ca4fd1f381
parent b2f299de88 a2418620bf
1 changed files with 96 additions and 70 deletions
--- a/src/calibre/ebooks/oeb/stylizer.py
+++ b/src/calibre/ebooks/oeb/stylizer.py
@ -95,6 +95,90 @@ def test_media_ok():
    assert media_ok('screen, (device-width:10px)')
    assert not media_ok('screen and (device-width:10px)')
 class StylizerRules(object):
    def __init__(self, opts, profile, stylesheets):
        self.opts, self.profile, self.stylesheets = opts, profile, stylesheets
        index = 0
        self.rules = []
        self.page_rule = {}
        self.font_face_rules = []
        for sheet_index, stylesheet in enumerate(stylesheets):
            href = stylesheet.href
            for rule in stylesheet.cssRules:
                if rule.type == rule.MEDIA_RULE:
                    if media_ok(rule.media.mediaText):
                        for subrule in rule.cssRules:
                            self.rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                            index += 1
                else:
                    self.rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        self.rules.sort(key=itemgetter(0))  # sort by specificity
    def flatten_rule(self, rule, href, index, is_user_agent_sheet=False):
        results = []
        sheet_index = 0 if is_user_agent_sheet else 1
        if isinstance(rule, CSSStyleRule):
            style = self.flatten_style(rule.style)
            for selector in rule.selectorList:
                specificity = (sheet_index,) + selector.specificity + (index,)
                text = selector.selectorText
                selector = list(selector.seq)
                results.append((specificity, selector, style, text, href))
        elif isinstance(rule, CSSPageRule):
            style = self.flatten_style(rule.style)
            self.page_rule.update(style)
        elif isinstance(rule, CSSFontFaceRule):
            if rule.style.length > 1:
                # Ignore the meaningless font face rules generated by the
                # benighted MS Word that contain only a font-family declaration
                # and nothing else
                self.font_face_rules.append(rule)
        return results
    def flatten_style(self, cssstyle):
        style = {}
        for prop in cssstyle:
            name = prop.name
            normalizer = normalizers.get(name, None)
            if normalizer is not None:
                style.update(normalizer(name, prop.cssValue))
            elif name == 'text-align':
                style['text-align'] = self._apply_text_align(prop.value)
            else:
                style[name] = prop.value
        if 'font-size' in style:
            size = style['font-size']
            if size == 'normal':
                size = 'medium'
            if size == 'smallest':
                size = 'xx-small'
            if size in FONT_SIZE_NAMES:
                style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase))
        if '-epub-writing-mode' in style:
            for x in ('-webkit-writing-mode', 'writing-mode'):
                style[x] = style.get(x, style['-epub-writing-mode'])
        return style
    def _apply_text_align(self, text):
        if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'):
            text = self.opts.change_justification
        return text
    def same_rules(self, opts, profile, stylesheets):
        if self.opts != opts:
            # it's unlikely to happen, but better safe than sorry
            return False
        if self.profile != profile:
            return False
        if len(self.stylesheets) != len(stylesheets):
            return False
        for index, stylesheet in enumerate(self.stylesheets):
            if stylesheet != stylesheets[index]:
                return False
        return True
 class Stylizer(object):
    STYLESHEETS = WeakKeyDictionary()
@ -133,7 +217,6 @@ class Stylizer(object):
        parser = CSSParser(fetcher=self._fetch_css_file,
                log=logging.getLogger('calibre.css'))
        self.font_face_rules = []
        for elem in style_tags:
            if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))):
                text = elem.text if elem.text else u''
@ -200,29 +283,22 @@ class Stylizer(object):
                    self.logger.exception('Failed to parse %s, ignoring.'%w)
                    self.logger.debug('Bad css: ')
                    self.logger.debug(x)
-        rules = []
+
-        index = 0
+        # using oeb to store the rules, page rule and font face rules
-        self.stylesheets = set()
+        # and generating them again if opts, profile or stylesheets are different
-        self.page_rule = {}
+        if (not hasattr(self.oeb, 'stylizer_rules')) \
-        for sheet_index, stylesheet in enumerate(stylesheets):
+            or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets):
-            href = stylesheet.href
+            self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets)
-            self.stylesheets.add(href)
+        self.rules = self.oeb.stylizer_rules.rules
-            for rule in stylesheet.cssRules:
+        self.page_rule = self.oeb.stylizer_rules.page_rule
-                if rule.type == rule.MEDIA_RULE:
+        self.font_face_rules = self.oeb.stylizer_rules.font_face_rules
-                    if media_ok(rule.media.mediaText):
+        self.flatten_style = self.oeb.stylizer_rules.flatten_style
-                        for subrule in rule.cssRules:
+
                            rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
                            index += 1
                else:
                    rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
                    index = index + 1
        rules.sort(key=itemgetter(0))  # sort by specificity
        self.rules = rules
        self._styles = {}
        pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
        select = Select(tree, ignore_inappropriate_pseudo_classes=True)
-        for _, _, cssdict, text, _ in rules:
+        for _, _, cssdict, text, _ in self.rules:
            fl = pseudo_pat.search(text)
            try:
                matches = tuple(select(text))
@ -301,56 +377,6 @@ class Stylizer(object):
            data = data.encode('utf-8')
        return ('utf-8', data)
    def flatten_rule(self, rule, href, index, is_user_agent_sheet=False):
        results = []
        sheet_index = 0 if is_user_agent_sheet else 1
        if isinstance(rule, CSSStyleRule):
            style = self.flatten_style(rule.style)
            for selector in rule.selectorList:
                specificity = (sheet_index,) + selector.specificity + (index,)
                text = selector.selectorText
                selector = list(selector.seq)
                results.append((specificity, selector, style, text, href))
        elif isinstance(rule, CSSPageRule):
            style = self.flatten_style(rule.style)
            self.page_rule.update(style)
        elif isinstance(rule, CSSFontFaceRule):
            if rule.style.length > 1:
                # Ignore the meaningless font face rules generated by the
                # benighted MS Word that contain only a font-family declaration
                # and nothing else
                self.font_face_rules.append(rule)
        return results
    def flatten_style(self, cssstyle):
        style = {}
        for prop in cssstyle:
            name = prop.name
            normalizer = normalizers.get(name, None)
            if normalizer is not None:
                style.update(normalizer(name, prop.cssValue))
            elif name == 'text-align':
                style['text-align'] = self._apply_text_align(prop.value)
            else:
                style[name] = prop.value
        if 'font-size' in style:
            size = style['font-size']
            if size == 'normal':
                size = 'medium'
            if size == 'smallest':
                size = 'xx-small'
            if size in FONT_SIZE_NAMES:
                style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase))
        if '-epub-writing-mode' in style:
            for x in ('-webkit-writing-mode', 'writing-mode'):
                style[x] = style.get(x, style['-epub-writing-mode'])
        return style
    def _apply_text_align(self, text):
        if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'):
            text = self.opts.change_justification
        return text
    def style(self, element):
        try:
            return self._styles[element]