From 2094df04c3242017b3588c14fe9adc79e7919def Mon Sep 17 00:00:00 2001 From: Claire Date: Sun, 5 May 2019 08:04:04 +0200 Subject: [PATCH 1/3] Avoid generating the same Stylizer rules again --- src/calibre/ebooks/oeb/stylizer.py | 166 +++++++++++++++++------------ 1 file changed, 96 insertions(+), 70 deletions(-) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index bb769549d9..7974979d4e 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -95,6 +95,77 @@ def test_media_ok(): assert media_ok('screen, (device-width:10px)') assert not media_ok('screen and (device-width:10px)') +class StylizerRules(object): + + def __init__(self, opts, profile, stylesheets): + self.opts, self.profile, self.stylesheets = opts, profile, stylesheets + + index = 0 + self.rules = [] + self.page_rule = {} + self.font_face_rules = [] + for sheet_index, stylesheet in enumerate(stylesheets): + href = stylesheet.href + for rule in stylesheet.cssRules: + if rule.type == rule.MEDIA_RULE: + if media_ok(rule.media.mediaText): + for subrule in rule.cssRules: + self.rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) + index += 1 + else: + self.rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) + index = index + 1 + self.rules.sort(key=itemgetter(0)) # sort by specificity + + def flatten_rule(self, rule, href, index, is_user_agent_sheet=False): + results = [] + sheet_index = 0 if is_user_agent_sheet else 1 + if isinstance(rule, CSSStyleRule): + style = self.flatten_style(rule.style) + for selector in rule.selectorList: + specificity = (sheet_index,) + selector.specificity + (index,) + text = selector.selectorText + selector = list(selector.seq) + results.append((specificity, selector, style, text, href)) + elif isinstance(rule, CSSPageRule): + style = self.flatten_style(rule.style) + self.page_rule.update(style) + elif isinstance(rule, CSSFontFaceRule): + if rule.style.length > 1: + # Ignore the meaningless font face rules generated by the + # benighted MS Word that contain only a font-family declaration + # and nothing else + self.font_face_rules.append(rule) + return results + + def flatten_style(self, cssstyle): + style = {} + for prop in cssstyle: + name = prop.name + normalizer = normalizers.get(name, None) + if normalizer is not None: + style.update(normalizer(name, prop.cssValue)) + elif name == 'text-align': + style['text-align'] = self._apply_text_align(prop.value) + else: + style[name] = prop.value + if 'font-size' in style: + size = style['font-size'] + if size == 'normal': + size = 'medium' + if size == 'smallest': + size = 'xx-small' + if size in FONT_SIZE_NAMES: + style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase)) + if '-epub-writing-mode' in style: + for x in ('-webkit-writing-mode', 'writing-mode'): + style[x] = style.get(x, style['-epub-writing-mode']) + return style + + def _apply_text_align(self, text): + if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'): + text = self.opts.change_justification + return text class Stylizer(object): STYLESHEETS = WeakKeyDictionary() @@ -133,7 +204,6 @@ class Stylizer(object): parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) - self.font_face_rules = [] for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))): text = elem.text if elem.text else u'' @@ -200,29 +270,20 @@ class Stylizer(object): self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) - rules = [] - index = 0 - self.stylesheets = set() - self.page_rule = {} - for sheet_index, stylesheet in enumerate(stylesheets): - href = stylesheet.href - self.stylesheets.add(href) - for rule in stylesheet.cssRules: - if rule.type == rule.MEDIA_RULE: - if media_ok(rule.media.mediaText): - for subrule in rule.cssRules: - rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) - index += 1 - else: - rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) - index = index + 1 - rules.sort(key=itemgetter(0)) # sort by specificity - self.rules = rules + + # using the profile to store the rules, page rule and font face rules + # and generating them again if stylesheets are different + # they should depend on opts too, but opts shouldn't change during + # a conversion process, so it should be safe... + if (not hasattr(self.profile, 'stylizer_rules')) \ + or set(self.profile.stylizer_rules.stylesheets) != set(stylesheets): + self.profile.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) + self._styles = {} pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) - for _, _, cssdict, text, _ in rules: + for _, _, cssdict, text, _ in self.rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) @@ -287,6 +348,21 @@ class Stylizer(object): if upd: style._update_cssdict(upd) + @property + def rules(self): + return self.profile.stylizer_rules.rules + + @property + def page_rule(self): + return self.profile.stylizer_rules.page_rule + + @property + def font_face_rules(self): + return self.profile.stylizer_rules.font_face_rules + + def flatten_style(self, cssstyle): + return self.profile.stylizer_rules.flatten_style(cssstyle) + def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: @@ -301,56 +377,6 @@ class Stylizer(object): data = data.encode('utf-8') return ('utf-8', data) - def flatten_rule(self, rule, href, index, is_user_agent_sheet=False): - results = [] - sheet_index = 0 if is_user_agent_sheet else 1 - if isinstance(rule, CSSStyleRule): - style = self.flatten_style(rule.style) - for selector in rule.selectorList: - specificity = (sheet_index,) + selector.specificity + (index,) - text = selector.selectorText - selector = list(selector.seq) - results.append((specificity, selector, style, text, href)) - elif isinstance(rule, CSSPageRule): - style = self.flatten_style(rule.style) - self.page_rule.update(style) - elif isinstance(rule, CSSFontFaceRule): - if rule.style.length > 1: - # Ignore the meaningless font face rules generated by the - # benighted MS Word that contain only a font-family declaration - # and nothing else - self.font_face_rules.append(rule) - return results - - def flatten_style(self, cssstyle): - style = {} - for prop in cssstyle: - name = prop.name - normalizer = normalizers.get(name, None) - if normalizer is not None: - style.update(normalizer(name, prop.cssValue)) - elif name == 'text-align': - style['text-align'] = self._apply_text_align(prop.value) - else: - style[name] = prop.value - if 'font-size' in style: - size = style['font-size'] - if size == 'normal': - size = 'medium' - if size == 'smallest': - size = 'xx-small' - if size in FONT_SIZE_NAMES: - style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase)) - if '-epub-writing-mode' in style: - for x in ('-webkit-writing-mode', 'writing-mode'): - style[x] = style.get(x, style['-epub-writing-mode']) - return style - - def _apply_text_align(self, text): - if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'): - text = self.opts.change_justification - return text - def style(self, element): try: return self._styles[element] From f3f741a1fae0960d567f32d2075feb4341bd17c6 Mon Sep 17 00:00:00 2001 From: Claire Date: Sun, 5 May 2019 13:21:25 +0200 Subject: [PATCH 2/3] Storing rules in oeb and rules comparison function --- src/calibre/ebooks/oeb/stylizer.py | 33 ++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 7974979d4e..0c15ca3032 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -167,6 +167,19 @@ class StylizerRules(object): text = self.opts.change_justification return text + def same_rules(self, opts, profile, stylesheets): + if self.opts != opts: + # it's unlikely to happen, but better safe than sorry + return False + if self.profile != profile: + return False + if len(self.stylesheets) != len(stylesheets): + return False + for index, stylesheet in enumerate(self.stylesheets): + if stylesheet != stylesheets[index]: + return False + return True + class Stylizer(object): STYLESHEETS = WeakKeyDictionary() @@ -271,13 +284,11 @@ class Stylizer(object): self.logger.debug('Bad css: ') self.logger.debug(x) - # using the profile to store the rules, page rule and font face rules - # and generating them again if stylesheets are different - # they should depend on opts too, but opts shouldn't change during - # a conversion process, so it should be safe... - if (not hasattr(self.profile, 'stylizer_rules')) \ - or set(self.profile.stylizer_rules.stylesheets) != set(stylesheets): - self.profile.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) + # using oeb to store the rules, page rule and font face rules + # and generating them again if opts, profile or stylesheets are different + if (not hasattr(self.oeb, 'stylizer_rules')) \ + or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets): + self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) self._styles = {} pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) @@ -350,18 +361,18 @@ class Stylizer(object): @property def rules(self): - return self.profile.stylizer_rules.rules + return self.oeb.stylizer_rules.rules @property def page_rule(self): - return self.profile.stylizer_rules.page_rule + return self.oeb.stylizer_rules.page_rule @property def font_face_rules(self): - return self.profile.stylizer_rules.font_face_rules + return self.oeb.stylizer_rules.font_face_rules def flatten_style(self, cssstyle): - return self.profile.stylizer_rules.flatten_style(cssstyle) + return self.oeb.stylizer_rules.flatten_style(cssstyle) def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs From a2418620bfbcd08d0573eaac109cf1c213469752 Mon Sep 17 00:00:00 2001 From: Claire Date: Mon, 6 May 2019 22:45:29 +0200 Subject: [PATCH 3/3] Store a copy of rules in stylizer --- src/calibre/ebooks/oeb/stylizer.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 0c15ca3032..9a5e8908d0 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -289,6 +289,10 @@ class Stylizer(object): if (not hasattr(self.oeb, 'stylizer_rules')) \ or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets): self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) + self.rules = self.oeb.stylizer_rules.rules + self.page_rule = self.oeb.stylizer_rules.page_rule + self.font_face_rules = self.oeb.stylizer_rules.font_face_rules + self.flatten_style = self.oeb.stylizer_rules.flatten_style self._styles = {} pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) @@ -359,21 +363,6 @@ class Stylizer(object): if upd: style._update_cssdict(upd) - @property - def rules(self): - return self.oeb.stylizer_rules.rules - - @property - def page_rule(self): - return self.oeb.stylizer_rules.page_rule - - @property - def font_face_rules(self): - return self.oeb.stylizer_rules.font_face_rules - - def flatten_style(self, cssstyle): - return self.oeb.stylizer_rules.flatten_style(cssstyle) - def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: