ODT Input: More workarounds for the image positioning markup produced by newer versions of LibreOffice. Fixes #1063207 (odt to anything [alignment])

2025-08-11 09:13:57 -04:00 · 2012-10-08 13:36:39 +05:30 · 2012-10-08 13:36:39 +05:30 · 9b8c6f218e
commit 9b8c6f218e
parent 9e33851777
1 changed files with 66 additions and 12 deletions
--- a/src/calibre/ebooks/odt/input.py
+++ b/src/calibre/ebooks/odt/input.py
@ -6,15 +6,19 @@ __docformat__ = 'restructuredtext en'
 '''
 Convert an ODT file into a Open Ebook
 '''
-import os
+import os, logging
 from lxml import etree
 from cssutils import CSSParser
 from cssutils.css import CSSRule
 from odf.odf2xhtml import ODF2XHTML
 from odf.opendocument import load as odLoad
 from odf.draw import Frame as odFrame, Image as odImage
 from odf.namespaces import TEXTNS as odTEXTNS
 from calibre import CurrentDir, walk
 from calibre.ebooks.oeb.base import _css_logger
 class Extract(ODF2XHTML):
@ -29,14 +33,14 @@ class Extract(ODF2XHTML):
    def fix_markup(self, html, log):
        root = etree.fromstring(html)
        self.epubify_markup(root, log)
        self.filter_css(root, log)
-        self.extract_css(root)
+        self.extract_css(root, log)
        self.epubify_markup(root, log)
        html = etree.tostring(root, encoding='utf-8',
                xml_declaration=True)
        return html
-    def extract_css(self, root):
+    def extract_css(self, root, log):
        ans = []
        for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
            ans.append(s.text)
@ -51,9 +55,21 @@ class Extract(ODF2XHTML):
            etree.SubElement(head, ns+'link', {'type':'text/css',
                'rel':'stylesheet', 'href':'odfpy.css'})
-        with open('odfpy.css', 'wb') as f:
+        css = u'\n\n'.join(ans)
-            f.write((u'\n\n'.join(ans)).encode('utf-8'))
+        parser = CSSParser(loglevel=logging.WARNING,
                            log=_css_logger)
        self.css = parser.parseString(css, validate=False)
        with open('odfpy.css', 'wb') as f:
            f.write(css.encode('utf-8'))
    def get_css_for_class(self, cls):
        if not cls: return None
        for rule in self.css.cssRules.rulesOfType(CSSRule.STYLE_RULE):
            for sel in rule.selectorList:
                q = sel.selectorText
                if q == '.' + cls:
                    return rule
    def epubify_markup(self, root, log):
        from calibre.ebooks.oeb.base import XPath, XHTML
@ -84,16 +100,54 @@ class Extract(ODF2XHTML):
                div.attrib['style'] = style
                img.attrib['style'] = 'max-width: 100%; max-height: 100%'
-        # A div/div/img construct causes text-align:center to not work in ADE
+        # Handle anchored images. The default markup + CSS produced by
-        # so set the display of the second div to inline. This should have no
+        # odf2xhtml works with WebKit but not with ADE. So we convert the
-        # effect (apart from minor vspace issues) in a compliant HTML renderer
+        # common cases of left/right/center aligned block images to work on
-        # but it fixes the centering of the image via a text-align:center on
+        # both webkit and ADE. We detect the case of setting the side margins
-        # the first div in ADE
+        # to auto and map it to an appropriate text-align directive, which
        # works in both WebKit and ADE.
        # https://bugs.launchpad.net/bugs/1063207
        # https://bugs.launchpad.net/calibre/+bug/859343
        imgpath = XPath('descendant::h:div/h:div/h:img')
        for img in imgpath(root):
            div2 = img.getparent()
            div1 = div2.getparent()
-            if len(div1) == len(div2) == 1:
+            if (len(div1), len(div2)) != (1, 1): continue
            cls = div1.get('class', '')
            first_rules = filter(None, [self.get_css_for_class(x) for x in
                cls.split()])
            has_align = False
            for r in first_rules:
                if r.style.getProperty(u'text-align') is not None:
                    has_align = True
            ml = mr = None
            if not has_align:
                aval = None
                cls = div2.get(u'class', u'')
                rules = filter(None, [self.get_css_for_class(x) for x in
                    cls.split()])
                for r in rules:
                    ml = r.style.getPropertyCSSValue(u'margin-left') or ml
                    mr = r.style.getPropertyCSSValue(u'margin-right') or mr
                    ml = getattr(ml, 'value', None)
                    mr = getattr(mr, 'value', None)
                if ml == mr == u'auto':
                    aval = u'center'
                elif ml == u'auto' and mr != u'auto':
                    aval = 'right'
                elif ml != u'auto' and mr == u'auto':
                    aval = 'left'
                if aval is not None:
                    style = div1.attrib.get('style', '').strip()
                    if style and not style.endswith(';'):
                        style = style + ';'
                    style += 'text-align:%s'%aval
                    has_align = True
                    div1.attrib['style'] = style
            if has_align:
                # This is needed for ADE, without it the text-align has no
                # effect
                style = div2.attrib['style']
                div2.attrib['style'] = 'display:inline;'+style