diff --git a/src/calibre/ebooks/odt/input.py b/src/calibre/ebooks/odt/input.py index 1184148e80..10553dac2b 100644 --- a/src/calibre/ebooks/odt/input.py +++ b/src/calibre/ebooks/odt/input.py @@ -7,6 +7,8 @@ __docformat__ = 'restructuredtext en' Convert an ODT file into a Open Ebook ''' import os + +from lxml import etree from odf.odf2xhtml import ODF2XHTML from calibre import CurrentDir, walk @@ -23,7 +25,48 @@ class Extract(ODF2XHTML): with open(name, 'wb') as f: f.write(data) - def __call__(self, stream, odir): + def filter_css(self, html, log): + root = etree.fromstring(html) + style = root.xpath('//*[local-name() = "style" and @type="text/css"]') + if style: + style = style[0] + css = style.text + if css: + style.text, sel_map = self.do_filter_css(css) + for x in root.xpath('//*[@class]'): + extra = [] + orig = x.get('class') + for cls in orig.split(): + extra.extend(sel_map.get(cls, [])) + if extra: + x.set('class', orig + ' ' + ' '.join(extra)) + html = etree.tostring(root, encoding='utf-8', + xml_declaration=True) + return html + + def do_filter_css(self, css): + from cssutils import parseString + from cssutils.css import CSSRule + sheet = parseString(css) + rules = list(sheet.cssRules.rulesOfType(CSSRule.STYLE_RULE)) + sel_map = {} + count = 0 + for r in rules: + # Check if we have only class selectors for this rule + nc = [x for x in r.selectorList if not + x.selectorText.startswith('.')] + if len(r.selectorList) > 1 and not nc: + replace_name = 'c_odt%d'%count + count += 1 + for sel in r.selectorList: + s = sel.selectorText[1:] + if s not in sel_map: + sel_map[s] = [] + sel_map[s].append(replace_name) + r.selectorText = '.'+replace_name + return sheet.cssText, sel_map + + def __call__(self, stream, odir, log): from calibre.utils.zipfile import ZipFile from calibre.ebooks.metadata.meta import get_metadata from calibre.ebooks.metadata.opf2 import OPFCreator @@ -32,13 +75,17 @@ class Extract(ODF2XHTML): if not os.path.exists(odir): os.makedirs(odir) with CurrentDir(odir): - print 'Extracting ODT file...' + log('Extracting ODT file...') html = self.odf2xhtml(stream) # A blanket img specification like this causes problems - # with EPUB output as the contaiing element often has + # with EPUB output as the containing element often has # an absolute height and width set that is larger than # the available screen real estate html = html.replace('img { width: 100%; height: 100%; }', '') + try: + html = self.filter_css(html, log) + except: + log.exception('Failed to filter CSS, conversion may be slow') with open('index.xhtml', 'wb') as f: f.write(html.encode('utf-8')) zf = ZipFile(stream, 'r') @@ -67,7 +114,7 @@ class ODTInput(InputFormatPlugin): def convert(self, stream, options, file_ext, log, accelerators): - return Extract()(stream, '.') + return Extract()(stream, '.', log) def postprocess_book(self, oeb, opts, log): # Fix

constructs as the asinine epubchecker complains diff --git a/src/odf/odf2xhtml.py b/src/odf/odf2xhtml.py index 26da9d9905..a04aa48bf7 100644 --- a/src/odf/odf2xhtml.py +++ b/src/odf/odf2xhtml.py @@ -841,11 +841,19 @@ ol, ul { padding-left: 2em; } self.styledict[name] = styles # Write the styles to HTML self.writeout(self.default_styles) + # Changed by Kovid to not write out endless copies of the same style + css_styles = {} for name in self.stylestack: styles = self.styledict.get(name) - css2 = self.cs.convert_styles(styles) - self.writeout("%s {\n" % name) - for style, val in css2.items(): + css2 = tuple(self.cs.convert_styles(styles).iteritems()) + if css2 in css_styles: + css_styles[css2].append(name) + else: + css_styles[css2] = [name] + + for css2, names in css_styles.iteritems(): + self.writeout("%s {\n" % ', '.join(names)) + for style, val in css2: self.writeout("\t%s: %s;\n" % (style, val) ) self.writeout("}\n")