mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
ODT Input: More workarounds for the image positioning markup produced by newer versions of LibreOffice. Fixes #1063207 (odt to anything [alignment])
This commit is contained in:
parent
9e33851777
commit
9b8c6f218e
@ -6,15 +6,19 @@ __docformat__ = 'restructuredtext en'
|
|||||||
'''
|
'''
|
||||||
Convert an ODT file into a Open Ebook
|
Convert an ODT file into a Open Ebook
|
||||||
'''
|
'''
|
||||||
import os
|
import os, logging
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from cssutils import CSSParser
|
||||||
|
from cssutils.css import CSSRule
|
||||||
|
|
||||||
from odf.odf2xhtml import ODF2XHTML
|
from odf.odf2xhtml import ODF2XHTML
|
||||||
from odf.opendocument import load as odLoad
|
from odf.opendocument import load as odLoad
|
||||||
from odf.draw import Frame as odFrame, Image as odImage
|
from odf.draw import Frame as odFrame, Image as odImage
|
||||||
from odf.namespaces import TEXTNS as odTEXTNS
|
from odf.namespaces import TEXTNS as odTEXTNS
|
||||||
|
|
||||||
from calibre import CurrentDir, walk
|
from calibre import CurrentDir, walk
|
||||||
|
from calibre.ebooks.oeb.base import _css_logger
|
||||||
|
|
||||||
class Extract(ODF2XHTML):
|
class Extract(ODF2XHTML):
|
||||||
|
|
||||||
@ -29,14 +33,14 @@ class Extract(ODF2XHTML):
|
|||||||
|
|
||||||
def fix_markup(self, html, log):
|
def fix_markup(self, html, log):
|
||||||
root = etree.fromstring(html)
|
root = etree.fromstring(html)
|
||||||
self.epubify_markup(root, log)
|
|
||||||
self.filter_css(root, log)
|
self.filter_css(root, log)
|
||||||
self.extract_css(root)
|
self.extract_css(root, log)
|
||||||
|
self.epubify_markup(root, log)
|
||||||
html = etree.tostring(root, encoding='utf-8',
|
html = etree.tostring(root, encoding='utf-8',
|
||||||
xml_declaration=True)
|
xml_declaration=True)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def extract_css(self, root):
|
def extract_css(self, root, log):
|
||||||
ans = []
|
ans = []
|
||||||
for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
|
for s in root.xpath('//*[local-name() = "style" and @type="text/css"]'):
|
||||||
ans.append(s.text)
|
ans.append(s.text)
|
||||||
@ -51,9 +55,21 @@ class Extract(ODF2XHTML):
|
|||||||
etree.SubElement(head, ns+'link', {'type':'text/css',
|
etree.SubElement(head, ns+'link', {'type':'text/css',
|
||||||
'rel':'stylesheet', 'href':'odfpy.css'})
|
'rel':'stylesheet', 'href':'odfpy.css'})
|
||||||
|
|
||||||
with open('odfpy.css', 'wb') as f:
|
css = u'\n\n'.join(ans)
|
||||||
f.write((u'\n\n'.join(ans)).encode('utf-8'))
|
parser = CSSParser(loglevel=logging.WARNING,
|
||||||
|
log=_css_logger)
|
||||||
|
self.css = parser.parseString(css, validate=False)
|
||||||
|
|
||||||
|
with open('odfpy.css', 'wb') as f:
|
||||||
|
f.write(css.encode('utf-8'))
|
||||||
|
|
||||||
|
def get_css_for_class(self, cls):
|
||||||
|
if not cls: return None
|
||||||
|
for rule in self.css.cssRules.rulesOfType(CSSRule.STYLE_RULE):
|
||||||
|
for sel in rule.selectorList:
|
||||||
|
q = sel.selectorText
|
||||||
|
if q == '.' + cls:
|
||||||
|
return rule
|
||||||
|
|
||||||
def epubify_markup(self, root, log):
|
def epubify_markup(self, root, log):
|
||||||
from calibre.ebooks.oeb.base import XPath, XHTML
|
from calibre.ebooks.oeb.base import XPath, XHTML
|
||||||
@ -84,16 +100,54 @@ class Extract(ODF2XHTML):
|
|||||||
div.attrib['style'] = style
|
div.attrib['style'] = style
|
||||||
img.attrib['style'] = 'max-width: 100%; max-height: 100%'
|
img.attrib['style'] = 'max-width: 100%; max-height: 100%'
|
||||||
|
|
||||||
# A div/div/img construct causes text-align:center to not work in ADE
|
# Handle anchored images. The default markup + CSS produced by
|
||||||
# so set the display of the second div to inline. This should have no
|
# odf2xhtml works with WebKit but not with ADE. So we convert the
|
||||||
# effect (apart from minor vspace issues) in a compliant HTML renderer
|
# common cases of left/right/center aligned block images to work on
|
||||||
# but it fixes the centering of the image via a text-align:center on
|
# both webkit and ADE. We detect the case of setting the side margins
|
||||||
# the first div in ADE
|
# to auto and map it to an appropriate text-align directive, which
|
||||||
|
# works in both WebKit and ADE.
|
||||||
|
# https://bugs.launchpad.net/bugs/1063207
|
||||||
|
# https://bugs.launchpad.net/calibre/+bug/859343
|
||||||
imgpath = XPath('descendant::h:div/h:div/h:img')
|
imgpath = XPath('descendant::h:div/h:div/h:img')
|
||||||
for img in imgpath(root):
|
for img in imgpath(root):
|
||||||
div2 = img.getparent()
|
div2 = img.getparent()
|
||||||
div1 = div2.getparent()
|
div1 = div2.getparent()
|
||||||
if len(div1) == len(div2) == 1:
|
if (len(div1), len(div2)) != (1, 1): continue
|
||||||
|
cls = div1.get('class', '')
|
||||||
|
first_rules = filter(None, [self.get_css_for_class(x) for x in
|
||||||
|
cls.split()])
|
||||||
|
has_align = False
|
||||||
|
for r in first_rules:
|
||||||
|
if r.style.getProperty(u'text-align') is not None:
|
||||||
|
has_align = True
|
||||||
|
ml = mr = None
|
||||||
|
if not has_align:
|
||||||
|
aval = None
|
||||||
|
cls = div2.get(u'class', u'')
|
||||||
|
rules = filter(None, [self.get_css_for_class(x) for x in
|
||||||
|
cls.split()])
|
||||||
|
for r in rules:
|
||||||
|
ml = r.style.getPropertyCSSValue(u'margin-left') or ml
|
||||||
|
mr = r.style.getPropertyCSSValue(u'margin-right') or mr
|
||||||
|
ml = getattr(ml, 'value', None)
|
||||||
|
mr = getattr(mr, 'value', None)
|
||||||
|
if ml == mr == u'auto':
|
||||||
|
aval = u'center'
|
||||||
|
elif ml == u'auto' and mr != u'auto':
|
||||||
|
aval = 'right'
|
||||||
|
elif ml != u'auto' and mr == u'auto':
|
||||||
|
aval = 'left'
|
||||||
|
if aval is not None:
|
||||||
|
style = div1.attrib.get('style', '').strip()
|
||||||
|
if style and not style.endswith(';'):
|
||||||
|
style = style + ';'
|
||||||
|
style += 'text-align:%s'%aval
|
||||||
|
has_align = True
|
||||||
|
div1.attrib['style'] = style
|
||||||
|
|
||||||
|
if has_align:
|
||||||
|
# This is needed for ADE, without it the text-align has no
|
||||||
|
# effect
|
||||||
style = div2.attrib['style']
|
style = div2.attrib['style']
|
||||||
div2.attrib['style'] = 'display:inline;'+style
|
div2.attrib['style'] = 'display:inline;'+style
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user