From 9ea57154066bb139d288252ba22a92d66edde35d Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 11 Sep 2012 12:49:32 +0530 Subject: [PATCH] Conversion: Add support for CSS pseudo classes :hover, :link, :visited, :first-line, :focus, :active, :first-letter --- src/calibre/ebooks/conversion/plumber.py | 2 + src/calibre/ebooks/oeb/stylizer.py | 73 +++++++++++++------- src/calibre/ebooks/oeb/transforms/flatcss.py | 70 ++++++++++++++----- 3 files changed, 104 insertions(+), 41 deletions(-) diff --git a/src/calibre/ebooks/conversion/plumber.py b/src/calibre/ebooks/conversion/plumber.py index 8f7ab10e0e..60cce24121 100644 --- a/src/calibre/ebooks/conversion/plumber.py +++ b/src/calibre/ebooks/conversion/plumber.py @@ -1009,6 +1009,8 @@ OptionRecommendation(name='search_replace', pr(0., _('Running transforms on ebook...')) + self.oeb.plumber_output_format = self.output_fmt or '' + from calibre.ebooks.oeb.transforms.guide import Clean Clean()(self.oeb, self.opts) pr(0.1) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 969f7c763a..d558f7f49b 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -268,33 +268,41 @@ class Stylizer(object): self.rules = rules self._styles = {} for _, _, cssdict, text, _ in rules: - fl = ':first-letter' in text - if fl: - text = text.replace(':first-letter', '') + fl = re.search(ur':(first-letter|first-line|link|hover|visited|active|focus)', text) + if fl is not None: + text = text.replace(fl.group(), '') selector = get_css_selector(text) matches = selector(tree, self.logger) - if fl: - from lxml.builder import ElementMaker - E = ElementMaker(namespace=XHTML_NS) - for elem in matches: - for x in elem.iter(): - if x.text: - punctuation_chars = [] - text = unicode(x.text) - while text: - if not unicodedata.category(text[0]).startswith('P'): - break - punctuation_chars.append(text[0]) - text = text[1:] + if fl is not None: + fl = fl.group(1) + if fl == 'first-letter' and getattr(self.oeb, + 'plumber_output_format', '').lower() == u'mobi': + # Fake first-letter + from lxml.builder import ElementMaker + E = ElementMaker(namespace=XHTML_NS) + for elem in matches: + for x in elem.iter(): + if x.text: + punctuation_chars = [] + text = unicode(x.text) + while text: + category = unicodedata.category(text[0]) + if category[0] not in {'P', 'Z'}: + break + punctuation_chars.append(text[0]) + text = text[1:] - special_text = u''.join(punctuation_chars) + \ - (text[0] if text else u'') - span = E.span(special_text) - span.tail = text[1:] - x.text = None - x.insert(0, span) - self.style(span)._update_cssdict(cssdict) - break + special_text = u''.join(punctuation_chars) + \ + (text[0] if text else u'') + span = E.span(special_text) + span.tail = text[1:] + x.text = None + x.insert(0, span) + self.style(span)._update_cssdict(cssdict) + break + else: # Element pseudo-class + for elem in matches: + self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) @@ -495,6 +503,7 @@ class Style(object): self._height = None self._lineHeight = None self._bgcolor = None + self._pseudo_classes = {} stylizer._styles[element] = self def set(self, prop, val): @@ -506,6 +515,11 @@ class Style(object): def _update_cssdict(self, cssdict): self._style.update(cssdict) + def _update_pseudo_class(self, name, cssdict): + orig = self._pseudo_classes.get(name, {}) + orig.update(cssdict) + self._pseudo_classes[name] = orig + def _apply_style_attr(self, url_replacer=None): attrib = self._element.attrib if 'style' not in attrib: @@ -778,3 +792,14 @@ class Style(object): def cssdict(self): return dict(self._style) + + def pseudo_classes(self, filter_css): + if filter_css: + css = copy.deepcopy(self._pseudo_classes) + for psel, cssdict in css.iteritems(): + for k in filter_css: + cssdict.pop(k, None) + else: + css = self._pseudo_classes + return {k:v for k, v in css.iteritems() if v} + diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 72c9dc0d72..6633651a82 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -222,7 +222,7 @@ class CSSFlattener(object): value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) - def flatten_node(self, node, stylizer, names, styles, psize, item_id): + def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id): if not isinstance(node.tag, basestring) \ or namespace(node.tag) != XHTML_NS: return @@ -357,25 +357,51 @@ class CSSFlattener(object): cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size - if cssdict: - items = cssdict.items() - items.sort() - css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) - classes = node.get('class', '').strip() or 'calibre' - klass = STRIPNUM.sub('', classes.split()[0].replace('_', '')) - if css in styles: - match = styles[css] - else: - match = klass + str(names[klass] or '') - styles[css] = match - names[klass] += 1 - node.attrib['class'] = match + pseudo_classes = style.pseudo_classes(self.filter_css) + if cssdict or pseudo_classes: + keep_classes = set() + + if cssdict: + items = cssdict.items() + items.sort() + css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) + classes = node.get('class', '').strip() or 'calibre' + klass = STRIPNUM.sub('', classes.split()[0].replace('_', '')) + if css in styles: + match = styles[css] + else: + match = klass + str(names[klass] or '') + styles[css] = match + names[klass] += 1 + node.attrib['class'] = match + keep_classes.add(match) + + for psel, cssdict in pseudo_classes.iteritems(): + items = sorted(cssdict.iteritems()) + css = u';\n'.join(u'%s: %s' % (key, val) for key, val in items) + pstyles = pseudo_styles[psel] + if css in pstyles: + match = pstyles[css] + else: + # We have to use a different class for each psel as + # otherwise you can have incorrect styles for a situation + # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } + # If the pcalibre class for a:hover and a:link is the same, + # then the class attribute for a.x tags will contain both + # that class and the class for a.x:hover, which is wrong. + klass = 'pcalibre' + match = klass + str(names[klass] or '') + pstyles[css] = match + names[klass] += 1 + keep_classes.add(match) + node.attrib['class'] = ' '.join(keep_classes) + elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] for child in node: - self.flatten_node(child, stylizer, names, styles, psize, item_id) + self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id) def flatten_head(self, item, href, global_href): html = item.data @@ -446,7 +472,7 @@ class CSSFlattener(object): def flatten_spine(self): names = defaultdict(int) - styles = {} + styles, pseudo_styles = {}, defaultdict(dict) for item in self.oeb.spine: html = item.data stylizer = self.stylizers[item] @@ -454,10 +480,20 @@ class CSSFlattener(object): self.specializer(item, stylizer) body = html.find(XHTML('body')) fsize = self.context.dest.fbase - self.flatten_node(body, stylizer, names, styles, fsize, item.id) + self.flatten_node(body, stylizer, names, styles, pseudo_styles, fsize, item.id) items = [(key, val) for (val, key) in styles.items()] items.sort() + # :hover must come after link and :active must come after :hover + psels = sorted(pseudo_styles.iterkeys(), key=lambda x : + {'hover':1, 'active':2}.get(x, 0)) + for psel in psels: + styles = pseudo_styles[psel] + if not styles: continue + x = sorted(((k+':'+psel, v) for v, k in styles.iteritems())) + items.extend(x) + css = ''.join(".%s {\n%s;\n}\n\n" % (key, val) for key, val in items) + href = self.replace_css(css) global_css = self.collect_global_css() for item in self.oeb.spine: