mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Add support for DOCX files created by Word 2013 in "Strict" mode
This commit is contained in:
parent
cf2aa25944
commit
6d58813c65
@ -28,7 +28,15 @@ def simple_float(val, mult=1.0):
|
|||||||
try:
|
try:
|
||||||
return float(val) * mult
|
return float(val) * mult
|
||||||
except (ValueError, TypeError, AttributeError, KeyError):
|
except (ValueError, TypeError, AttributeError, KeyError):
|
||||||
return None
|
pass
|
||||||
|
|
||||||
|
def twips(val, mult=0.05):
|
||||||
|
''' Parse val as either a pure number representing twentieths of a point or a number followed by the suffix pt, representing pts.'''
|
||||||
|
try:
|
||||||
|
return float(val) * mult
|
||||||
|
except (ValueError, TypeError, AttributeError, KeyError):
|
||||||
|
if val and val.endswith('pt') and mult == 0.05:
|
||||||
|
return twips(val[:-2], mult=1.0)
|
||||||
|
|
||||||
|
|
||||||
LINE_STYLES = { # {{{
|
LINE_STYLES = { # {{{
|
||||||
@ -160,6 +168,7 @@ def read_spacing(parent, dest, XPath, get):
|
|||||||
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
|
l, lr = get(s, 'w:line'), get(s, 'w:lineRule', 'auto')
|
||||||
if l is not None:
|
if l is not None:
|
||||||
lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
|
lh = simple_float(l, 0.05) if lr in {'exact', 'atLeast'} else simple_float(l, 1/240.0)
|
||||||
|
if lh is not None:
|
||||||
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')
|
line_height = '%.3g%s' % (lh, 'pt' if lr in {'exact', 'atLeast'} else '')
|
||||||
|
|
||||||
setattr(dest, 'margin_top', padding_top)
|
setattr(dest, 'margin_top', padding_top)
|
||||||
|
@ -30,6 +30,11 @@ TRANSITIONAL_NAMES = {
|
|||||||
'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings',
|
'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STRICT_NAMES = {
|
||||||
|
k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument')
|
||||||
|
for k, v in TRANSITIONAL_NAMES.iteritems()
|
||||||
|
}
|
||||||
|
|
||||||
TRANSITIONAL_NAMESPACES = {
|
TRANSITIONAL_NAMESPACES = {
|
||||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
'o': 'urn:schemas-microsoft-com:office:office',
|
'o': 'urn:schemas-microsoft-com:office:office',
|
||||||
@ -60,6 +65,14 @@ TRANSITIONAL_NAMESPACES = {
|
|||||||
'dcmitype': 'http://purl.org/dc/dcmitype/',
|
'dcmitype': 'http://purl.org/dc/dcmitype/',
|
||||||
'dcterms': 'http://purl.org/dc/terms/'
|
'dcterms': 'http://purl.org/dc/terms/'
|
||||||
}
|
}
|
||||||
|
|
||||||
|
STRICT_NAMESPACES = {
|
||||||
|
k:v.replace(
|
||||||
|
'http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument').replace(
|
||||||
|
'http://schemas.openxmlformats.org/wordprocessingml/2006', 'http://purl.oclc.org/ooxml/wordprocessingml').replace(
|
||||||
|
'http://schemas.openxmlformats.org/drawingml/2006', 'http://purl.oclc.org/ooxml/drawingml')
|
||||||
|
for k, v in TRANSITIONAL_NAMESPACES.iteritems()
|
||||||
|
}
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def barename(x):
|
def barename(x):
|
||||||
@ -83,6 +96,9 @@ class DOCXNamespace(object):
|
|||||||
if transitional:
|
if transitional:
|
||||||
self.namespaces = TRANSITIONAL_NAMESPACES.copy()
|
self.namespaces = TRANSITIONAL_NAMESPACES.copy()
|
||||||
self.names = TRANSITIONAL_NAMES.copy()
|
self.names = TRANSITIONAL_NAMES.copy()
|
||||||
|
else:
|
||||||
|
self.namespaces = STRICT_NAMESPACES.copy()
|
||||||
|
self.names = STRICT_NAMES.copy()
|
||||||
|
|
||||||
def XPath(self, expr):
|
def XPath(self, expr):
|
||||||
ans = self.xpath_cache.get(expr, None)
|
ans = self.xpath_cache.get(expr, None)
|
||||||
|
@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
|||||||
import textwrap
|
import textwrap
|
||||||
from collections import OrderedDict, Counter
|
from collections import OrderedDict, Counter
|
||||||
|
|
||||||
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit
|
from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit, twips
|
||||||
from calibre.ebooks.docx.char_styles import RunStyle
|
from calibre.ebooks.docx.char_styles import RunStyle
|
||||||
from calibre.ebooks.docx.tables import TableStyle
|
from calibre.ebooks.docx.tables import TableStyle
|
||||||
|
|
||||||
@ -21,29 +21,21 @@ class PageProperties(object):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, namespace, elems=()):
|
def __init__(self, namespace, elems=()):
|
||||||
self.width = self.height = 595.28, 841.89 # pts, A4
|
self.width, self.height = 595.28, 841.89 # pts, A4
|
||||||
self.margin_left = self.margin_right = 72 # pts
|
self.margin_left = self.margin_right = 72 # pts
|
||||||
|
|
||||||
|
def setval(attr, val):
|
||||||
|
val = twips(val)
|
||||||
|
if val is not None:
|
||||||
|
setattr(self, attr, val)
|
||||||
|
|
||||||
for sectPr in elems:
|
for sectPr in elems:
|
||||||
for pgSz in namespace.XPath('./w:pgSz')(sectPr):
|
for pgSz in namespace.XPath('./w:pgSz')(sectPr):
|
||||||
w, h = namespace.get(pgSz, 'w:w'), namespace.get(pgSz, 'w:h')
|
w, h = namespace.get(pgSz, 'w:w'), namespace.get(pgSz, 'w:h')
|
||||||
try:
|
setval('width', w), setval('height', h)
|
||||||
self.width = int(w)/20
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
self.height = int(h)/20
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
for pgMar in namespace.XPath('./w:pgMar')(sectPr):
|
for pgMar in namespace.XPath('./w:pgMar')(sectPr):
|
||||||
l, r = namespace.get(pgMar, 'w:left'), namespace.get(pgMar, 'w:right')
|
l, r = namespace.get(pgMar, 'w:left'), namespace.get(pgMar, 'w:right')
|
||||||
try:
|
setval('margin_left', l), setval('margin_right', r)
|
||||||
self.margin_left = int(l)/20
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
try:
|
|
||||||
self.margin_right = int(r)/20
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class Style(object):
|
class Style(object):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user