Conversion: Performance improvement for books that have many HTML files that all include the same CSS stylesheets

Now the conversion engine will flatten the same sequence of sheets
only once.

Merge branch 'master' of https://github.com/princesse-framboise/calibre
This commit is contained in:
Kovid Goyal 2019-06-14 10:10:05 +05:30
commit ca4fd1f381
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -95,6 +95,90 @@ def test_media_ok():
assert media_ok('screen, (device-width:10px)')
assert not media_ok('screen and (device-width:10px)')
class StylizerRules(object):
def __init__(self, opts, profile, stylesheets):
self.opts, self.profile, self.stylesheets = opts, profile, stylesheets
index = 0
self.rules = []
self.page_rule = {}
self.font_face_rules = []
for sheet_index, stylesheet in enumerate(stylesheets):
href = stylesheet.href
for rule in stylesheet.cssRules:
if rule.type == rule.MEDIA_RULE:
if media_ok(rule.media.mediaText):
for subrule in rule.cssRules:
self.rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
index += 1
else:
self.rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
index = index + 1
self.rules.sort(key=itemgetter(0)) # sort by specificity
def flatten_rule(self, rule, href, index, is_user_agent_sheet=False):
results = []
sheet_index = 0 if is_user_agent_sheet else 1
if isinstance(rule, CSSStyleRule):
style = self.flatten_style(rule.style)
for selector in rule.selectorList:
specificity = (sheet_index,) + selector.specificity + (index,)
text = selector.selectorText
selector = list(selector.seq)
results.append((specificity, selector, style, text, href))
elif isinstance(rule, CSSPageRule):
style = self.flatten_style(rule.style)
self.page_rule.update(style)
elif isinstance(rule, CSSFontFaceRule):
if rule.style.length > 1:
# Ignore the meaningless font face rules generated by the
# benighted MS Word that contain only a font-family declaration
# and nothing else
self.font_face_rules.append(rule)
return results
def flatten_style(self, cssstyle):
style = {}
for prop in cssstyle:
name = prop.name
normalizer = normalizers.get(name, None)
if normalizer is not None:
style.update(normalizer(name, prop.cssValue))
elif name == 'text-align':
style['text-align'] = self._apply_text_align(prop.value)
else:
style[name] = prop.value
if 'font-size' in style:
size = style['font-size']
if size == 'normal':
size = 'medium'
if size == 'smallest':
size = 'xx-small'
if size in FONT_SIZE_NAMES:
style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase))
if '-epub-writing-mode' in style:
for x in ('-webkit-writing-mode', 'writing-mode'):
style[x] = style.get(x, style['-epub-writing-mode'])
return style
def _apply_text_align(self, text):
if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'):
text = self.opts.change_justification
return text
def same_rules(self, opts, profile, stylesheets):
if self.opts != opts:
# it's unlikely to happen, but better safe than sorry
return False
if self.profile != profile:
return False
if len(self.stylesheets) != len(stylesheets):
return False
for index, stylesheet in enumerate(self.stylesheets):
if stylesheet != stylesheets[index]:
return False
return True
class Stylizer(object):
STYLESHEETS = WeakKeyDictionary()
@ -133,7 +217,6 @@ class Stylizer(object):
parser = CSSParser(fetcher=self._fetch_css_file,
log=logging.getLogger('calibre.css'))
self.font_face_rules = []
for elem in style_tags:
if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))):
text = elem.text if elem.text else u''
@ -200,29 +283,22 @@ class Stylizer(object):
self.logger.exception('Failed to parse %s, ignoring.'%w)
self.logger.debug('Bad css: ')
self.logger.debug(x)
rules = []
index = 0
self.stylesheets = set()
self.page_rule = {}
for sheet_index, stylesheet in enumerate(stylesheets):
href = stylesheet.href
self.stylesheets.add(href)
for rule in stylesheet.cssRules:
if rule.type == rule.MEDIA_RULE:
if media_ok(rule.media.mediaText):
for subrule in rule.cssRules:
rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0))
index += 1
else:
rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0))
index = index + 1
rules.sort(key=itemgetter(0)) # sort by specificity
self.rules = rules
# using oeb to store the rules, page rule and font face rules
# and generating them again if opts, profile or stylesheets are different
if (not hasattr(self.oeb, 'stylizer_rules')) \
or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets):
self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets)
self.rules = self.oeb.stylizer_rules.rules
self.page_rule = self.oeb.stylizer_rules.page_rule
self.font_face_rules = self.oeb.stylizer_rules.font_face_rules
self.flatten_style = self.oeb.stylizer_rules.flatten_style
self._styles = {}
pseudo_pat = re.compile(u':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I)
select = Select(tree, ignore_inappropriate_pseudo_classes=True)
for _, _, cssdict, text, _ in rules:
for _, _, cssdict, text, _ in self.rules:
fl = pseudo_pat.search(text)
try:
matches = tuple(select(text))
@ -301,56 +377,6 @@ class Stylizer(object):
data = data.encode('utf-8')
return ('utf-8', data)
def flatten_rule(self, rule, href, index, is_user_agent_sheet=False):
results = []
sheet_index = 0 if is_user_agent_sheet else 1
if isinstance(rule, CSSStyleRule):
style = self.flatten_style(rule.style)
for selector in rule.selectorList:
specificity = (sheet_index,) + selector.specificity + (index,)
text = selector.selectorText
selector = list(selector.seq)
results.append((specificity, selector, style, text, href))
elif isinstance(rule, CSSPageRule):
style = self.flatten_style(rule.style)
self.page_rule.update(style)
elif isinstance(rule, CSSFontFaceRule):
if rule.style.length > 1:
# Ignore the meaningless font face rules generated by the
# benighted MS Word that contain only a font-family declaration
# and nothing else
self.font_face_rules.append(rule)
return results
def flatten_style(self, cssstyle):
style = {}
for prop in cssstyle:
name = prop.name
normalizer = normalizers.get(name, None)
if normalizer is not None:
style.update(normalizer(name, prop.cssValue))
elif name == 'text-align':
style['text-align'] = self._apply_text_align(prop.value)
else:
style[name] = prop.value
if 'font-size' in style:
size = style['font-size']
if size == 'normal':
size = 'medium'
if size == 'smallest':
size = 'xx-small'
if size in FONT_SIZE_NAMES:
style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase))
if '-epub-writing-mode' in style:
for x in ('-webkit-writing-mode', 'writing-mode'):
style[x] = style.get(x, style['-epub-writing-mode'])
return style
def _apply_text_align(self, text):
if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'):
text = self.opts.change_justification
return text
def style(self, element):
try:
return self._styles[element]