diff --git a/src/calibre/ebooks/docx/block_styles.py b/src/calibre/ebooks/docx/block_styles.py index 09bdae2e22..103fb6c30f 100644 --- a/src/calibre/ebooks/docx/block_styles.py +++ b/src/calibre/ebooks/docx/block_styles.py @@ -7,13 +7,12 @@ __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' from collections import OrderedDict -from calibre.ebooks.docx.names import XPath, get class Inherit: pass inherit = Inherit() -def binary_property(parent, name): +def binary_property(parent, name, XPath, get): vals = XPath('./w:%s' % name)(parent) if not vals: return inherit @@ -68,7 +67,7 @@ LINE_STYLES = { # {{{ border_props = ('padding_%s', 'border_%s_width', 'border_%s_style', 'border_%s_color') -def read_single_border(parent, edge): +def read_single_border(parent, edge, XPath, get): color = style = width = padding = None for elem in XPath('./w:%s' % edge)(parent): c = get(elem, 'w:color') @@ -95,19 +94,19 @@ def read_single_border(parent, edge): width = 3 # WebKit needs 3pts to render double borders return {p:v for p, v in zip(border_props, (padding, width, style, color))} -def read_border(parent, dest, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'): +def read_border(parent, dest, XPath, get, border_edges=('left', 'top', 'right', 'bottom'), name='pBdr'): vals = {k % edge:inherit for edge in border_edges for k in border_props} for border in XPath('./w:' + name)(parent): for edge in border_edges: - for prop, val in read_single_border(border, edge).iteritems(): + for prop, val in read_single_border(border, edge, XPath, get).iteritems(): if val is not None: vals[prop % edge] = val for key, val in vals.iteritems(): setattr(dest, key, val) -def read_indent(parent, dest): +def read_indent(parent, dest, XPath, get): padding_left = padding_right = text_indent = inherit for indent in XPath('./w:ind')(parent): l, lc = get(indent, 'w:left'), get(indent, 'w:leftChars') @@ -133,7 +132,7 @@ def read_indent(parent, dest): setattr(dest, 'margin_right', padding_right) setattr(dest, 'text_indent', text_indent) -def read_justification(parent, dest): +def read_justification(parent, dest, XPath, get): ans = inherit for jc in XPath('./w:jc[@w:val]')(parent): val = get(jc, 'w:val') @@ -145,7 +144,7 @@ def read_justification(parent, dest): ans = val setattr(dest, 'text_align', ans) -def read_spacing(parent, dest): +def read_spacing(parent, dest, XPath, get): padding_top = padding_bottom = line_height = inherit for s in XPath('./w:spacing')(parent): a, al, aa = get(s, 'w:after'), get(s, 'w:afterLines'), get(s, 'w:afterAutospacing') @@ -167,7 +166,7 @@ def read_spacing(parent, dest): setattr(dest, 'margin_bottom', padding_bottom) setattr(dest, 'line_height', line_height) -def read_direction(parent, dest): +def read_direction(parent, dest, XPath, get): ans = inherit for jc in XPath('./w:textFlow[@w:val]')(parent): val = get(jc, 'w:val') @@ -177,7 +176,7 @@ def read_direction(parent, dest): ans = 'rtl' setattr(dest, 'direction', ans) -def read_shd(parent, dest): +def read_shd(parent, dest, XPath, get): ans = inherit for shd in XPath('./w:shd[@w:fill]')(parent): val = get(shd, 'w:fill') @@ -185,7 +184,7 @@ def read_shd(parent, dest): ans = simple_color(val, auto='transparent') setattr(dest, 'background_color', ans) -def read_numbering(parent, dest): +def read_numbering(parent, dest, XPath, get): lvl = num_id = None for np in XPath('./w:numPr')(parent): for ilvl in XPath('./w:ilvl[@w:val]')(np): @@ -203,7 +202,7 @@ class Frame(object): all_attributes = ('drop_cap', 'h', 'w', 'h_anchor', 'h_rule', 'v_anchor', 'wrap', 'h_space', 'v_space', 'lines', 'x_align', 'y_align', 'x', 'y') - def __init__(self, fp): + def __init__(self, fp, XPath, get): self.drop_cap = get(fp, 'w:dropCap', 'none') try: self.h = int(get(fp, 'w:h'))/20 @@ -275,10 +274,10 @@ class Frame(object): def __ne__(self, other): return not self.__eq__(other) -def read_frame(parent, dest): +def read_frame(parent, dest, XPath, get): ans = inherit for fp in XPath('./w:framePr')(parent): - ans = Frame(fp) + ans = Frame(fp, XPath, get) setattr(dest, 'frame', ans) # }}} @@ -303,7 +302,8 @@ class ParagraphStyle(object): 'numbering', 'font_family', 'font_size', 'color', 'frame', ) - def __init__(self, pPr=None): + def __init__(self, namespace, pPr=None): + self.namespace = namespace self.linked_style = None if pPr is None: for p in self.all_properties: @@ -315,14 +315,14 @@ class ParagraphStyle(object): 'pageBreakBefore', 'snapToGrid', 'suppressLineNumbers', 'suppressOverlap', 'topLinePunct', 'widowControl', 'wordWrap', ): - setattr(self, p, binary_property(pPr, p)) + setattr(self, p, binary_property(pPr, p, namespace.XPath, namespace.get)) for x in ('border', 'indent', 'justification', 'spacing', 'direction', 'shd', 'numbering', 'frame'): f = globals()['read_%s' % x] - f(pPr, self) + f(pPr, self, namespace.XPath, namespace.get) - for s in XPath('./w:pStyle[@w:val]')(pPr): - self.linked_style = get(s, 'w:val') + for s in namespace.XPath('./w:pStyle[@w:val]')(pPr): + self.linked_style = namespace.get(s, 'w:val') self.font_family = self.font_size = self.color = inherit diff --git a/src/calibre/ebooks/docx/char_styles.py b/src/calibre/ebooks/docx/char_styles.py index 01b23a500a..fd1deffc5c 100644 --- a/src/calibre/ebooks/docx/char_styles.py +++ b/src/calibre/ebooks/docx/char_styles.py @@ -9,10 +9,9 @@ __copyright__ = '2013, Kovid Goyal ' from collections import OrderedDict from calibre.ebooks.docx.block_styles import ( # noqa inherit, simple_color, LINE_STYLES, simple_float, binary_property, read_shd) -from calibre.ebooks.docx.names import XPath, get # Read from XML {{{ -def read_text_border(parent, dest): +def read_text_border(parent, dest, XPath, get): border_color = border_style = border_width = padding = inherit elems = XPath('./w:bdr')(parent) if elems and elems[0].attrib: @@ -46,7 +45,7 @@ def read_text_border(parent, dest): setattr(dest, 'border_width', border_width) setattr(dest, 'padding', padding) -def read_color(parent, dest): +def read_color(parent, dest, XPath, get): ans = inherit for col in XPath('./w:color[@w:val]')(parent): val = get(col, 'w:val') @@ -61,7 +60,7 @@ def convert_highlight_color(val): 'darkGreen': '#008000', 'darkMagenta': '#800080', 'darkRed': '#800000', 'darkYellow': '#808000', 'lightGray': '#c0c0c0'}.get(val, val) -def read_highlight(parent, dest): +def read_highlight(parent, dest, XPath, get): ans = inherit for col in XPath('./w:highlight[@w:val]')(parent): val = get(col, 'w:val') @@ -74,7 +73,7 @@ def read_highlight(parent, dest): ans = val setattr(dest, 'highlight', ans) -def read_lang(parent, dest): +def read_lang(parent, dest, XPath, get): ans = inherit for col in XPath('./w:lang[@w:val]')(parent): val = get(col, 'w:val') @@ -91,7 +90,7 @@ def read_lang(parent, dest): ans = val setattr(dest, 'lang', ans) -def read_letter_spacing(parent, dest): +def read_letter_spacing(parent, dest, XPath, get): ans = inherit for col in XPath('./w:spacing[@w:val]')(parent): val = simple_float(get(col, 'w:val'), 0.05) @@ -99,7 +98,7 @@ def read_letter_spacing(parent, dest): ans = val setattr(dest, 'letter_spacing', ans) -def read_sz(parent, dest): +def read_sz(parent, dest, XPath, get): ans = inherit for col in XPath('./w:sz[@w:val]')(parent): val = simple_float(get(col, 'w:val'), 0.5) @@ -107,7 +106,7 @@ def read_sz(parent, dest): ans = val setattr(dest, 'font_size', ans) -def read_underline(parent, dest): +def read_underline(parent, dest, XPath, get): ans = inherit for col in XPath('./w:u[@w:val]')(parent): val = get(col, 'w:val') @@ -115,7 +114,7 @@ def read_underline(parent, dest): ans = val if val == 'none' else 'underline' setattr(dest, 'text_decoration', ans) -def read_vert_align(parent, dest): +def read_vert_align(parent, dest, XPath, get): ans = inherit for col in XPath('./w:vertAlign[@w:val]')(parent): val = get(col, 'w:val') @@ -123,7 +122,7 @@ def read_vert_align(parent, dest): ans = val setattr(dest, 'vert_align', ans) -def read_font_family(parent, dest): +def read_font_family(parent, dest, XPath, get): ans = inherit for col in XPath('./w:rFonts')(parent): val = get(col, 'w:asciiTheme') @@ -150,7 +149,8 @@ class RunStyle(object): 'b', 'bCs', 'caps', 'emboss', 'i', 'iCs', 'imprint', 'shadow', 'smallCaps', 'strike', 'dstrike', 'vanish', } - def __init__(self, rPr=None): + def __init__(self, namespace, rPr=None): + self.namespace = namespace self.linked_style = None if rPr is None: for p in self.all_properties: @@ -160,14 +160,14 @@ class RunStyle(object): 'b', 'bCs', 'caps', 'cs', 'dstrike', 'emboss', 'i', 'iCs', 'imprint', 'rtl', 'shadow', 'smallCaps', 'strike', 'vanish', 'webHidden', ): - setattr(self, p, binary_property(rPr, p)) + setattr(self, p, binary_property(rPr, p, namespace.XPath, namespace.get)) for x in ('text_border', 'color', 'highlight', 'shd', 'letter_spacing', 'sz', 'underline', 'vert_align', 'lang', 'font_family'): f = globals()['read_%s' % x] - f(rPr, self) + f(rPr, self, namespace.XPath, namespace.get) - for s in XPath('./w:rStyle[@w:val]')(rPr): - self.linked_style = get(s, 'w:val') + for s in namespace.XPath('./w:rStyle[@w:val]')(rPr): + self.linked_style = namespace.get(s, 'w:val') self._css = None diff --git a/src/calibre/ebooks/docx/cleanup.py b/src/calibre/ebooks/docx/cleanup.py index 88c0076797..04cee33268 100644 --- a/src/calibre/ebooks/docx/cleanup.py +++ b/src/calibre/ebooks/docx/cleanup.py @@ -8,7 +8,6 @@ __copyright__ = '2013, Kovid Goyal ' import os -from calibre.ebooks.docx.names import XPath NBSP = '\xa0' def mergeable(previous, current): @@ -99,7 +98,7 @@ def before_count(root, tag, limit=10): if ans > limit: return limit -def cleanup_markup(log, root, styles, dest_dir, detect_cover): +def cleanup_markup(log, root, styles, dest_dir, detect_cover, XPath): # Move
s outside paragraphs, if possible. pancestor = XPath('|'.join('ancestor::%s[1]' % x for x in ('p', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) for hr in root.xpath('//span/hr'): diff --git a/src/calibre/ebooks/docx/container.py b/src/calibre/ebooks/docx/container.py index cb3201e4f7..024c55a0a7 100644 --- a/src/calibre/ebooks/docx/container.py +++ b/src/calibre/ebooks/docx/container.py @@ -14,7 +14,7 @@ from calibre import walk, guess_type from calibre.ebooks.metadata import string_to_authors, authors_to_sort_string from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.docx import InvalidDOCX -from calibre.ebooks.docx.names import DOCUMENT, DOCPROPS, XPath, APPPROPS +from calibre.ebooks.docx.names import DOCXNamespace from calibre.ptempfile import PersistentTemporaryDirectory from calibre.utils.localization import canonicalize_lang from calibre.utils.logging import default_log @@ -25,7 +25,7 @@ def fromstring(raw, parser=RECOVER_PARSER): return etree.fromstring(raw, parser=parser) # Read metadata {{{ -def read_doc_props(raw, mi): +def read_doc_props(raw, mi, XPath): root = fromstring(raw) titles = XPath('//dc:title')(root) if titles: @@ -72,7 +72,7 @@ def read_app_props(raw, mi): if company and company[0].text and company[0].text.strip(): mi.publisher = company[0].text.strip() -def read_default_style_language(raw, mi): +def read_default_style_language(raw, mi, XPath): root = fromstring(raw) for lang in XPath('/w:styles/w:docDefaults/w:rPrDefault/w:rPr/w:lang/@w:val')(root): lang = canonicalize_lang(lang) @@ -84,6 +84,7 @@ def read_default_style_language(raw, mi): class DOCX(object): def __init__(self, path_or_stream, log=None, extract=True): + self.docx_is_transitional = True stream = path_or_stream if hasattr(path_or_stream, 'read') else open(path_or_stream, 'rb') self.name = getattr(stream, 'name', None) or '' self.log = log or default_log @@ -93,6 +94,7 @@ class DOCX(object): self.init_zipfile(stream) self.read_content_types() self.read_package_relationships() + self.namespace = DOCXNamespace(self.docx_is_transitional) def init_zipfile(self, stream): self.zipf = ZipFile(stream) @@ -158,12 +160,14 @@ class DOCX(object): for item in root.xpath('//*[local-name()="Relationships"]/*[local-name()="Relationship" and @Type and @Target]'): target = item.get('Target').lstrip('/') typ = item.get('Type') + if target == 'word/document.xml': + self.docx_is_transitional = typ != 'http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument' self.relationships[typ] = target self.relationships_rmap[target] = typ @property def document_name(self): - name = self.relationships.get(DOCUMENT, None) + name = self.relationships.get(self.namespace.names['DOCUMENT'], None) if name is None: names = tuple(n for n in self.names if n == 'document.xml' or n.endswith('/document.xml')) if not names: @@ -201,13 +205,13 @@ class DOCX(object): return by_id, by_type def get_document_properties_names(self): - name = self.relationships.get(DOCPROPS, None) + name = self.relationships.get(self.namespace.names['DOCPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/core.xml') if names: name = names[0] yield name - name = self.relationships.get(APPPROPS, None) + name = self.relationships.get(self.namespace.names['APPPROPS'], None) if name is None: names = tuple(n for n in self.names if n.lower() == 'docprops/app.xml') if names: @@ -224,16 +228,16 @@ class DOCX(object): except KeyError: pass else: - read_doc_props(raw, mi) + read_doc_props(raw, mi, self.namespace.XPath) if mi.is_null('language'): try: raw = self.read('word/styles.xml') except KeyError: pass else: - read_default_style_language(raw, mi) + read_default_style_language(raw, mi, self.namespace.XPath) - ap_name = self.relationships.get(APPPROPS, None) + ap_name = self.relationships.get(self.namespace.names['APPPROPS'], None) if ap_name: try: raw = self.read(ap_name) diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py index 7e89247db3..8bc88815cb 100644 --- a/src/calibre/ebooks/docx/fields.py +++ b/src/calibre/ebooks/docx/fields.py @@ -9,7 +9,6 @@ __copyright__ = '2013, Kovid Goyal ' import re from calibre.ebooks.docx.index import process_index, polish_index_markup -from calibre.ebooks.docx.names import XPath, get, namespaces class Field(object): @@ -48,9 +47,6 @@ scanner = re.Scanner([ null = object() -def WORD(x): - return '{%s}%s' % (namespaces['w'], x) - def parser(name, field_map, default_field_name=None): field_map = dict((x.split(':') for x in field_map.split())) @@ -98,22 +94,23 @@ parse_noteref = parser('noteref', class Fields(object): - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace self.fields = [] self.index_bookmark_counter = 0 self.index_bookmark_prefix = 'index-' def __call__(self, doc, log): - all_ids = frozenset(XPath('//*/@w:id')(doc)) + all_ids = frozenset(self.namespace.XPath('//*/@w:id')(doc)) c = 0 while self.index_bookmark_prefix in all_ids: c += 1 self.index_bookmark_prefix = self.index_bookmark_prefix.replace('-', '%d-' % c) stack = [] - for elem in XPath( + for elem in self.namespace.XPath( '//*[name()="w:p" or name()="w:r" or name()="w:instrText" or (name()="w:fldChar" and (@w:fldCharType="begin" or @w:fldCharType="end"))]')(doc): if elem.tag.endswith('}fldChar'): - typ = get(elem, 'w:fldCharType') + typ = self.namespace.get(elem, 'w:fldCharType') if typ == 'begin': stack.append(Field(elem)) self.fields.append(stack[-1]) @@ -193,6 +190,8 @@ class Fields(object): if xe: # We insert a synthetic bookmark around this index item so that we # can link to it later + def WORD(x): + return self.namespace.expand('w:' + x) self.index_bookmark_counter += 1 bmark = xe['anchor'] = '%s%d' % (self.index_bookmark_prefix, self.index_bookmark_counter) p = field.start.getparent() @@ -210,7 +209,7 @@ class Fields(object): if not field.contents: return idx = parse_func(field.instructions, log) - hyperlinks, blocks = process_index(field, idx, self.xe_fields, log) + hyperlinks, blocks = process_index(field, idx, self.xe_fields, log, self.namespace.XPath, self.namespace.expand) if not blocks: return for anchor, run in hyperlinks: diff --git a/src/calibre/ebooks/docx/fonts.py b/src/calibre/ebooks/docx/fonts.py index db7bef583e..cc6f8001e8 100644 --- a/src/calibre/ebooks/docx/fonts.py +++ b/src/calibre/ebooks/docx/fonts.py @@ -10,7 +10,6 @@ import os, re from collections import namedtuple from calibre.ebooks.docx.block_styles import binary_property, inherit -from calibre.ebooks.docx.names import XPath, get from calibre.utils.filenames import ascii_filename from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font @@ -29,7 +28,7 @@ def get_variant(bold=False, italic=False): class Family(object): - def __init__(self, elem, embed_relationships): + def __init__(self, elem, embed_relationships, XPath, get): self.name = self.family_name = get(elem, 'w:name') self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem)) if self.alt_names and not has_system_fonts(self.name): @@ -51,7 +50,7 @@ class Family(object): for x in XPath('./w:family[@w:val]')(elem): self.generic_family = get(x, 'w:val', 'auto') - ntt = binary_property(elem, 'notTrueType') + ntt = binary_property(elem, 'notTrueType', XPath, get) self.is_ttf = ntt is inherit or not ntt self.panose1 = None @@ -73,13 +72,14 @@ class Family(object): class Fonts(object): - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace self.fonts = {} self.used = set() def __call__(self, root, embed_relationships, docx, dest_dir): - for elem in XPath('//w:font[@w:name]')(root): - self.fonts[get(elem, 'w:name')] = Family(elem, embed_relationships) + for elem in self.namespace.XPath('//w:font[@w:name]')(root): + self.fonts[self.namespace.get(elem, 'w:name')] = Family(elem, embed_relationships, self.namespace.XPath, self.namespace.get) def family_for(self, name, bold=False, italic=False): f = self.fonts.get(name, None) diff --git a/src/calibre/ebooks/docx/footnotes.py b/src/calibre/ebooks/docx/footnotes.py index 023f56ece9..6acc506caf 100644 --- a/src/calibre/ebooks/docx/footnotes.py +++ b/src/calibre/ebooks/docx/footnotes.py @@ -8,42 +8,43 @@ __copyright__ = '2013, Kovid Goyal ' from collections import OrderedDict -from calibre.ebooks.docx.names import get, XPath, descendants - class Note(object): - def __init__(self, parent, rels): - self.type = get(parent, 'w:type', 'normal') + def __init__(self, namespace, parent, rels): + self.type = namespace.get(parent, 'w:type', 'normal') self.parent = parent self.rels = rels + self.namespace = namespace def __iter__(self): - for p in descendants(self.parent, 'w:p', 'w:tbl'): + for p in self.namespace.descendants(self.parent, 'w:p', 'w:tbl'): yield p class Footnotes(object): - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace self.footnotes = {} self.endnotes = {} self.counter = 0 self.notes = OrderedDict() def __call__(self, footnotes, footnotes_rels, endnotes, endnotes_rels): + XPath, get = self.namespace.XPath, self.namespace.get if footnotes is not None: for footnote in XPath('./w:footnote[@w:id]')(footnotes): fid = get(footnote, 'w:id') if fid: - self.footnotes[fid] = Note(footnote, footnotes_rels) + self.footnotes[fid] = Note(self.namespace, footnote, footnotes_rels) if endnotes is not None: for endnote in XPath('./w:endnote[@w:id]')(endnotes): fid = get(endnote, 'w:id') if fid: - self.endnotes[fid] = Note(endnote, endnotes_rels) + self.endnotes[fid] = Note(self.namespace, endnote, endnotes_rels) def get_ref(self, ref): - fid = get(ref, 'w:id') + fid = self.namespace.get(ref, 'w:id') notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes note = notes.get(fid, None) if note is not None and note.type == 'normal': diff --git a/src/calibre/ebooks/docx/images.py b/src/calibre/ebooks/docx/images.py index 4c8584f3f3..fcee1e20f6 100644 --- a/src/calibre/ebooks/docx/images.py +++ b/src/calibre/ebooks/docx/images.py @@ -11,7 +11,7 @@ import os from lxml.html.builder import IMG, HR from calibre.constants import iswindows -from calibre.ebooks.docx.names import XPath, get, barename +from calibre.ebooks.docx.names import barename from calibre.utils.filenames import ascii_filename from calibre.utils.imghdr import what @@ -27,7 +27,7 @@ def emu_to_pt(x): def pt_to_emu(x): return int(x * 12700) -def get_image_properties(parent): +def get_image_properties(parent, XPath, get): width = height = None for extent in XPath('./wp:extent')(parent): try: @@ -67,7 +67,7 @@ def get_image_margins(elem): ans['padding-%s' % css] = '%.3gpt' % val return ans -def get_hpos(anchor, page_width): +def get_hpos(anchor, page_width, XPath, get): for ph in XPath('./wp:positionH')(anchor): rp = ph.get('relativeFrom', None) if rp == 'leftMargin': @@ -101,7 +101,8 @@ def get_hpos(anchor, page_width): class Images(object): - def __init__(self, log): + def __init__(self, namespace, log): + self.namespace = namespace self.rid_map = {} self.used = {} self.names = set() @@ -158,6 +159,7 @@ class Images(object): return name def pic_to_img(self, pic, alt, parent): + XPath, get = self.namespace.XPath, self.namespace.get name = None link = None for hl in XPath('descendant::a:hlinkClick[@r:id]')(parent): @@ -191,9 +193,10 @@ class Images(object): return img def drawing_to_html(self, drawing, page): + XPath, get = self.namespace.XPath, self.namespace.get # First process the inline pictures for inline in XPath('./wp:inline')(drawing): - style, alt = get_image_properties(inline) + style, alt = get_image_properties(inline, XPath, get) for pic in XPath('descendant::pic:pic')(inline): ans = self.pic_to_img(pic, alt, inline) if ans is not None: @@ -203,7 +206,7 @@ class Images(object): # Now process the floats for anchor in XPath('./wp:anchor')(drawing): - style, alt = get_image_properties(anchor) + style, alt = get_image_properties(anchor, XPath, get) self.get_float_properties(anchor, style, page) for pic in XPath('descendant::pic:pic')(anchor): ans = self.pic_to_img(pic, alt, anchor) @@ -213,6 +216,7 @@ class Images(object): yield ans def pict_to_html(self, pict, page): + XPath, get = self.namespace.XPath, self.namespace.get # First see if we have an
is_hr = len(pict) == 1 and get(pict[0], 'o:hr') in {'t', 'true'} if is_hr: @@ -247,6 +251,7 @@ class Images(object): yield img def get_float_properties(self, anchor, style, page): + XPath, get = self.namespace.XPath, self.namespace.get if 'display' not in style: style['display'] = 'block' padding = get_image_margins(anchor) @@ -257,7 +262,7 @@ class Images(object): # Ignore margins page_width = page.width - hpos = get_hpos(anchor, page_width) + width/(2*page_width) + hpos = get_hpos(anchor, page_width, XPath, get) + width/(2*page_width) wrap_elem = None dofloat = False diff --git a/src/calibre/ebooks/docx/index.py b/src/calibre/ebooks/docx/index.py index 605553c0a1..13bc9242cb 100644 --- a/src/calibre/ebooks/docx/index.py +++ b/src/calibre/ebooks/docx/index.py @@ -10,10 +10,9 @@ from operator import itemgetter from lxml import etree -from calibre.ebooks.docx.names import XPath, expand from calibre.utils.icu import partition_by_first_letter, sort_key -def get_applicable_xe_fields(index, xe_fields): +def get_applicable_xe_fields(index, xe_fields, XPath, expand): iet = index.get('entry-type', None) xe_fields = [xe for xe in xe_fields if xe.get('entry-type', None) == iet] @@ -40,7 +39,7 @@ def get_applicable_xe_fields(index, xe_fields): return [xe for xe in xe_fields if contained(xe)] -def make_block(style, parent, pos): +def make_block(expand, style, parent, pos): p = parent.makeelement(expand('w:p')) parent.insert(pos, p) if style is not None: @@ -56,7 +55,7 @@ def make_block(style, parent, pos): r.append(t) return p, t -def add_xe(xe, t): +def add_xe(xe, t, expand): text = xe.get('text', '') pt = xe.get('page-number-text', None) t.text = text or ' ' @@ -70,7 +69,7 @@ def add_xe(xe, t): r.append(t2) return xe['anchor'], t.getparent() -def process_index(field, index, xe_fields, log): +def process_index(field, index, xe_fields, log, XPath, expand): ''' We remove all the word generated index markup and replace it with our own that is more suitable for an ebook. @@ -89,7 +88,7 @@ def process_index(field, index, xe_fields, log): start_pos = (p, p.index(elem)) p.remove(elem) - xe_fields = get_applicable_xe_fields(index, xe_fields) + xe_fields = get_applicable_xe_fields(index, xe_fields, XPath, expand) if not xe_fields: return [], [] if heading_text is not None: @@ -107,14 +106,14 @@ def process_index(field, index, xe_fields, log): for item in reversed(items): is_heading = not isinstance(item, dict) style = heading_style if is_heading else None - p, t = make_block(style, *start_pos) + p, t = make_block(expand, style, *start_pos) if is_heading: text = heading_text if text.lower().startswith('a'): text = item + text[1:] t.text = text else: - hyperlinks.append(add_xe(item, t)) + hyperlinks.append(add_xe(item, t, expand)) blocks.append(p) return hyperlinks, blocks diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 90f8ae6250..afa74f1f55 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -12,22 +12,25 @@ from lxml.etree import XPath as X from calibre.utils.filenames import ascii_text -DOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument' -DOCPROPS = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties' -APPPROPS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties' -STYLES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles' -NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering' -FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable' -EMBEDDED_FONT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font' -IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image' -LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' -FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes' -ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes' -THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme' -SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings' -WEB_SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings' +# Names {{{ +TRANSITIONAL_NAMES = { + 'DOCUMENT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument', + 'DOCPROPS' : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties', + 'APPPROPS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties', + 'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles', + 'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering', + 'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable', + 'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font', + 'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image', + 'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink', + 'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes', + 'ENDNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes', + 'THEMES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme', + 'SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings', + 'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings', +} -namespaces = { +TRANSITIONAL_NAMESPACES = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', 'o': 'urn:schemas-microsoft-com:office:office', 've': 'http://schemas.openxmlformats.org/markup-compatibility/2006', @@ -57,40 +60,13 @@ namespaces = { 'dcmitype': 'http://purl.org/dc/dcmitype/', 'dcterms': 'http://purl.org/dc/terms/' } - -xpath_cache = {} - -def XPath(expr): - ans = xpath_cache.get(expr, None) - if ans is None: - xpath_cache[expr] = ans = X(expr, namespaces=namespaces) - return ans - -def is_tag(x, q): - tag = getattr(x, 'tag', x) - ns, name = q.partition(':')[0::2] - return '{%s}%s' % (namespaces.get(ns, None), name) == tag +# }}} def barename(x): return x.rpartition('}')[-1] def XML(x): - return '{%s}%s' % (namespaces['xml'], x) - -def expand(name, sep=':'): - ns, tag = name.partition(sep)[::2] - if ns and tag: - tag = '{%s}%s' % (namespaces[ns], tag) - return tag or ns - -def get(x, attr, default=None): - return x.attrib.get(expand(attr), default) - -def ancestor(elem, name): - try: - return XPath('ancestor::%s[1]' % name)(elem)[0] - except IndexError: - return None + return '{%s}%s' % (TRANSITIONAL_NAMESPACES['xml'], x) def generate_anchor(name, existing): x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_') @@ -100,14 +76,48 @@ def generate_anchor(name, existing): c += 1 return y -def children(elem, *args): - return XPath('|'.join('child::%s' % a for a in args))(elem) +class DOCXNamespace(object): -def descendants(elem, *args): - return XPath('|'.join('descendant::%s' % a for a in args))(elem) + def __init__(self, transitional=True): + self.xpath_cache = {} + if transitional: + self.namespaces = TRANSITIONAL_NAMESPACES.copy() + self.names = TRANSITIONAL_NAMES.copy() -def makeelement(root, tag, append=True, **attrs): - ans = root.makeelement(expand(tag), **{expand(k, sep='_'):v for k, v in attrs.iteritems()}) - if append: - root.append(ans) - return ans + def XPath(self, expr): + ans = self.xpath_cache.get(expr, None) + if ans is None: + self.xpath_cache[expr] = ans = X(expr, namespaces=self.namespaces) + return ans + + def is_tag(self, x, q): + tag = getattr(x, 'tag', x) + ns, name = q.partition(':')[0::2] + return '{%s}%s' % (self.namespaces.get(ns, None), name) == tag + + def expand(self, name, sep=':'): + ns, tag = name.partition(sep)[::2] + if ns and tag: + tag = '{%s}%s' % (self.namespaces[ns], tag) + return tag or ns + + def get(self, x, attr, default=None): + return x.attrib.get(self.expand(attr), default) + + def ancestor(self, elem, name): + try: + return self.XPath('ancestor::%s[1]' % name)(elem)[0] + except IndexError: + return None + + def children(self, elem, *args): + return self.XPath('|'.join('child::%s' % a for a in args))(elem) + + def descendants(self, elem, *args): + return self.XPath('|'.join('descendant::%s' % a for a in args))(elem) + + def makeelement(self, root, tag, append=True, **attrs): + ans = root.makeelement(self.expand(tag), **{self.expand(k, sep='_'):v for k, v in attrs.iteritems()}) + if append: + root.append(ans) + return ans diff --git a/src/calibre/ebooks/docx/numbering.py b/src/calibre/ebooks/docx/numbering.py index 25a3b4026c..a9e9654db9 100644 --- a/src/calibre/ebooks/docx/numbering.py +++ b/src/calibre/ebooks/docx/numbering.py @@ -13,7 +13,6 @@ from lxml.html.builder import OL, UL, SPAN from calibre.ebooks.docx.block_styles import ParagraphStyle from calibre.ebooks.docx.char_styles import RunStyle, inherit -from calibre.ebooks.docx.names import XPath, get STYLE_MAP = { 'aiueo': 'hiragana', @@ -32,7 +31,8 @@ STYLE_MAP = { class Level(object): - def __init__(self, lvl=None): + def __init__(self, namespace, lvl=None): + self.namespace = namespace self.restart = None self.start = 0 self.fmt = 'decimal' @@ -47,7 +47,7 @@ class Level(object): self.read_from_xml(lvl) def copy(self): - ans = Level() + ans = Level(self.namespace) for x in ('restart', 'pic_id', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template', 'bullet_template'): setattr(ans, x, getattr(self, x)) return ans @@ -61,6 +61,7 @@ class Level(object): return re.sub(r'%(\d+)', sub, template).rstrip() + '\xa0' def read_from_xml(self, lvl, override=False): + XPath, get = self.namespace.XPath, self.namespace.get for lr in XPath('./w:lvlRestart[@w:val]')(lvl): try: self.restart = int(get(lr, 'w:val')) @@ -74,7 +75,7 @@ class Level(object): pass for rPr in XPath('./w:rPr')(lvl): - ps = RunStyle(rPr) + ps = RunStyle(self.namespace, rPr) if self.character_style is None: self.character_style = ps else: @@ -106,7 +107,7 @@ class Level(object): self.para_link = get(lr, 'w:val') for pPr in XPath('./w:pPr')(lvl): - ps = ParagraphStyle(pPr) + ps = ParagraphStyle(self.namespace, pPr) if self.paragraph_style is None: self.paragraph_style = ps else: @@ -135,7 +136,9 @@ class Level(object): class NumberingDefinition(object): - def __init__(self, parent=None, an_id=None): + def __init__(self, namespace, parent=None, an_id=None): + self.namespace = namespace + XPath, get = self.namespace.XPath, self.namespace.get self.levels = {} self.abstract_numbering_definition_id = an_id if parent is not None: @@ -144,17 +147,18 @@ class NumberingDefinition(object): ilvl = int(get(lvl, 'w:ilvl', 0)) except (TypeError, ValueError): ilvl = 0 - self.levels[ilvl] = Level(lvl) + self.levels[ilvl] = Level(namespace, lvl) def copy(self): - ans = NumberingDefinition(an_id=self.abstract_numbering_definition_id) + ans = NumberingDefinition(self.namespace, an_id=self.abstract_numbering_definition_id) for l, lvl in self.levels.iteritems(): ans.levels[l] = lvl.copy() return ans class Numbering(object): - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace self.definitions = {} self.instances = {} self.counters = defaultdict(Counter) @@ -163,6 +167,7 @@ class Numbering(object): def __call__(self, root, styles, rid_map): ' Read all numbering style definitions ' + XPath, get = self.namespace.XPath, self.namespace.get self.rid_map = rid_map for npb in XPath('./w:numPicBullet[@w:numPicBulletId]')(root): npbid = get(npb, 'w:numPicBulletId') @@ -176,7 +181,7 @@ class Numbering(object): if nsl: lazy_load[an_id] = get(nsl[0], 'w:val') else: - nd = NumberingDefinition(an, an_id=an_id) + nd = NumberingDefinition(self.namespace, an, an_id=an_id) self.definitions[an_id] = nd def create_instance(n, definition): @@ -199,7 +204,7 @@ class Numbering(object): ilvl = nilvl if ilvl is None else ilvl alvl = nd.levels.get(ilvl, None) if alvl is None: - alvl = Level() + alvl = Level(self.namespace) alvl.read_from_xml(lvl, override=True) for ilvl, so in start_overrides.iteritems(): try: diff --git a/src/calibre/ebooks/docx/settings.py b/src/calibre/ebooks/docx/settings.py index b496dd0abe..dbd76b57e9 100644 --- a/src/calibre/ebooks/docx/settings.py +++ b/src/calibre/ebooks/docx/settings.py @@ -6,17 +6,16 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -from calibre.ebooks.docx.names import XPath, get - class Settings(object): - def __init__(self): + def __init__(self, namespace): self.default_tab_stop = 720 / 20 + self.namespace = namespace def __call__(self, root): - for dts in XPath('//w:defaultTabStop[@w:val]')(root): + for dts in self.namespace.XPath('//w:defaultTabStop[@w:val]')(root): try: - self.default_tab_stop = int(get(dts, 'w:val')) / 20 + self.default_tab_stop = int(self.namespace.get(dts, 'w:val')) / 20 except (ValueError, TypeError, AttributeError): pass diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index edaaf73162..ba1900aafd 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -12,7 +12,6 @@ from collections import OrderedDict, Counter from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.tables import TableStyle -from calibre.ebooks.docx.names import XPath, get class PageProperties(object): @@ -21,12 +20,12 @@ class PageProperties(object): sectPr elements. ''' - def __init__(self, elems=()): + def __init__(self, namespace, elems=()): self.width = self.height = 595.28, 841.89 # pts, A4 self.margin_left = self.margin_right = 72 # pts for sectPr in elems: - for pgSz in XPath('./w:pgSz')(sectPr): - w, h = get(pgSz, 'w:w'), get(pgSz, 'w:h') + for pgSz in namespace.XPath('./w:pgSz')(sectPr): + w, h = namespace.get(pgSz, 'w:w'), namespace.get(pgSz, 'w:h') try: self.width = int(w)/20 except (ValueError, TypeError): @@ -35,8 +34,8 @@ class PageProperties(object): self.height = int(h)/20 except (ValueError, TypeError): pass - for pgMar in XPath('./w:pgMar')(sectPr): - l, r = get(pgMar, 'w:left'), get(pgMar, 'w:right') + for pgMar in namespace.XPath('./w:pgMar')(sectPr): + l, r = namespace.get(pgMar, 'w:left'), namespace.get(pgMar, 'w:right') try: self.margin_left = int(l)/20 except (ValueError, TypeError): @@ -52,41 +51,41 @@ class Style(object): Class representing a element. Can contain block, character, etc. styles. ''' - name_path = XPath('./w:name[@w:val]') - based_on_path = XPath('./w:basedOn[@w:val]') - - def __init__(self, elem): + def __init__(self, namespace, elem): + self.namespace = namespace + self.name_path = namespace.XPath('./w:name[@w:val]') + self.based_on_path = namespace.XPath('./w:basedOn[@w:val]') self.resolved = False - self.style_id = get(elem, 'w:styleId') - self.style_type = get(elem, 'w:type') + self.style_id = namespace.get(elem, 'w:styleId') + self.style_type = namespace.get(elem, 'w:type') names = self.name_path(elem) - self.name = get(names[-1], 'w:val') if names else None + self.name = namespace.get(names[-1], 'w:val') if names else None based_on = self.based_on_path(elem) - self.based_on = get(based_on[0], 'w:val') if based_on else None + self.based_on = namespace.get(based_on[0], 'w:val') if based_on else None if self.style_type == 'numbering': self.based_on = None - self.is_default = get(elem, 'w:default') in {'1', 'on', 'true'} + self.is_default = namespace.get(elem, 'w:default') in {'1', 'on', 'true'} self.paragraph_style = self.character_style = self.table_style = None if self.style_type in {'paragraph', 'character', 'table'}: if self.style_type == 'table': - for tblPr in XPath('./w:tblPr')(elem): - ts = TableStyle(tblPr) + for tblPr in namespace.XPath('./w:tblPr')(elem): + ts = TableStyle(namespace, tblPr) if self.table_style is None: self.table_style = ts else: self.table_style.update(ts) if self.style_type in {'paragraph', 'table'}: - for pPr in XPath('./w:pPr')(elem): - ps = ParagraphStyle(pPr) + for pPr in namespace.XPath('./w:pPr')(elem): + ps = ParagraphStyle(namespace, pPr) if self.paragraph_style is None: self.paragraph_style = ps else: self.paragraph_style.update(ps) - for rPr in XPath('./w:rPr')(elem): - rs = RunStyle(rPr) + for rPr in namespace.XPath('./w:rPr')(elem): + rs = RunStyle(namespace, rPr) if self.character_style is None: self.character_style = rs else: @@ -94,21 +93,21 @@ class Style(object): if self.style_type in {'numbering', 'paragraph'}: self.numbering_style_link = None - for x in XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem): - self.numbering_style_link = get(x, 'w:val') + for x in namespace.XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem): + self.numbering_style_link = namespace.get(x, 'w:val') def resolve_based_on(self, parent): if parent.table_style is not None: if self.table_style is None: - self.table_style = TableStyle() + self.table_style = TableStyle(self.namespace) self.table_style.resolve_based_on(parent.table_style) if parent.paragraph_style is not None: if self.paragraph_style is None: - self.paragraph_style = ParagraphStyle() + self.paragraph_style = ParagraphStyle(self.namespace) self.paragraph_style.resolve_based_on(parent.paragraph_style) if parent.character_style is not None: if self.character_style is None: - self.character_style = RunStyle() + self.character_style = RunStyle(self.namespace) self.character_style.resolve_based_on(parent.character_style) @@ -118,7 +117,8 @@ class Styles(object): Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup. ''' - def __init__(self, tables): + def __init__(self, namespace, tables): + self.namespace = namespace self.id_map = OrderedDict() self.para_cache = {} self.para_char_cache = {} @@ -144,8 +144,8 @@ class Styles(object): def __call__(self, root, fonts, theme): self.fonts, self.theme = fonts, theme - for s in XPath('//w:style')(root): - s = Style(s) + for s in self.namespace.XPath('//w:style')(root): + s = Style(self.namespace, s) if s.style_id: self.id_map[s.style_id] = s if s.is_default: @@ -155,17 +155,17 @@ class Styles(object): self.default_paragraph_style = self.default_character_style = None - for dd in XPath('./w:docDefaults')(root): - for pd in XPath('./w:pPrDefault')(dd): - for pPr in XPath('./w:pPr')(pd): - ps = ParagraphStyle(pPr) + for dd in self.namespace.XPath('./w:docDefaults')(root): + for pd in self.namespace.XPath('./w:pPrDefault')(dd): + for pPr in self.namespace.XPath('./w:pPr')(pd): + ps = ParagraphStyle(self.namespace, pPr) if self.default_paragraph_style is None: self.default_paragraph_style = ps else: self.default_paragraph_style.update(ps) - for pd in XPath('./w:rPrDefault')(dd): - for pPr in XPath('./w:rPr')(pd): - ps = RunStyle(pPr) + for pd in self.namespace.XPath('./w:rPrDefault')(dd): + for pPr in self.namespace.XPath('./w:rPr')(pd): + ps = RunStyle(self.namespace, pPr) if self.default_character_style is None: self.default_character_style = ps else: @@ -213,18 +213,18 @@ class Styles(object): ans = self.para_cache.get(p, None) if ans is None: linked_style = None - ans = self.para_cache[p] = ParagraphStyle() + ans = self.para_cache[p] = ParagraphStyle(self.namespace) ans.style_name = None direct_formatting = None - for pPr in XPath('./w:pPr')(p): - ps = ParagraphStyle(pPr) + for pPr in self.namespace.XPath('./w:pPr')(p): + ps = ParagraphStyle(self.namespace, pPr) if direct_formatting is None: direct_formatting = ps else: direct_formatting.update(ps) if direct_formatting is None: - direct_formatting = ParagraphStyle() + direct_formatting = ParagraphStyle(self.namespace) parent_styles = [] if self.default_paragraph_style is not None: parent_styles.append(self.default_paragraph_style) @@ -275,19 +275,19 @@ class Styles(object): def resolve_run(self, r): ans = self.run_cache.get(r, None) if ans is None: - p = XPath('ancestor::w:p[1]')(r) + p = self.namespace.XPath('ancestor::w:p[1]')(r) p = p[0] if p else None - ans = self.run_cache[r] = RunStyle() + ans = self.run_cache[r] = RunStyle(self.namespace) direct_formatting = None - for rPr in XPath('./w:rPr')(r): - rs = RunStyle(rPr) + for rPr in self.namespace.XPath('./w:rPr')(r): + rs = RunStyle(self.namespace, rPr) if direct_formatting is None: direct_formatting = rs else: direct_formatting.update(rs) if direct_formatting is None: - direct_formatting = RunStyle() + direct_formatting = RunStyle(self.namespace) parent_styles = [] default_char = self.default_styles.get('character', None) @@ -484,5 +484,3 @@ class Styles(object): b = '\n'.join(b) ans.append('.%s {\n%s\n}\n' % (cls, b.rstrip(';'))) return prefix + '\n' + '\n'.join(ans) - - diff --git a/src/calibre/ebooks/docx/tables.py b/src/calibre/ebooks/docx/tables.py index b250bcf633..7da13a0e41 100644 --- a/src/calibre/ebooks/docx/tables.py +++ b/src/calibre/ebooks/docx/tables.py @@ -10,13 +10,12 @@ from lxml.html.builder import TABLE, TR, TD from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle from calibre.ebooks.docx.char_styles import RunStyle -from calibre.ebooks.docx.names import XPath, get, is_tag # Read from XML {{{ read_shd = rs edges = ('left', 'top', 'right', 'bottom') -def _read_width(elem): +def _read_width(elem, get): ans = inherit try: w = int(get(elem, 'w:w')) @@ -33,29 +32,29 @@ def _read_width(elem): ans = '%.3g%%' % (w/50) return ans -def read_width(parent, dest): +def read_width(parent, dest, XPath, get): ans = inherit for tblW in XPath('./w:tblW')(parent): - ans = _read_width(tblW) + ans = _read_width(tblW, get) setattr(dest, 'width', ans) -def read_cell_width(parent, dest): +def read_cell_width(parent, dest, XPath, get): ans = inherit for tblW in XPath('./w:tcW')(parent): - ans = _read_width(tblW) + ans = _read_width(tblW, get) setattr(dest, 'width', ans) -def read_padding(parent, dest): +def read_padding(parent, dest, XPath, get): name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar' ans = {x:inherit for x in edges} for mar in XPath('./w:%s' % name)(parent): for x in edges: for edge in XPath('./w:%s' % x)(mar): - ans[x] = _read_width(edge) + ans[x] = _read_width(edge, get) for x in edges: setattr(dest, 'cell_padding_%s' % x, ans[x]) -def read_justification(parent, dest): +def read_justification(parent, dest, XPath, get): left = right = inherit for jc in XPath('./w:jc[@w:val]')(parent): val = get(jc, 'w:val') @@ -70,31 +69,31 @@ def read_justification(parent, dest): setattr(dest, 'margin_left', left) setattr(dest, 'margin_right', right) -def read_spacing(parent, dest): +def read_spacing(parent, dest, XPath, get): ans = inherit for cs in XPath('./w:tblCellSpacing')(parent): - ans = _read_width(cs) + ans = _read_width(cs, get) setattr(dest, 'spacing', ans) -def read_float(parent, dest): +def read_float(parent, dest, XPath, get): ans = inherit for x in XPath('./w:tblpPr')(parent): ans = {k.rpartition('}')[-1]: v for k, v in x.attrib.iteritems()} setattr(dest, 'float', ans) -def read_indent(parent, dest): +def read_indent(parent, dest, XPath, get): ans = inherit for cs in XPath('./w:tblInd')(parent): - ans = _read_width(cs) + ans = _read_width(cs, get) setattr(dest, 'indent', ans) border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV') -def read_borders(parent, dest): +def read_borders(parent, dest, XPath, get): name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders' - read_border(parent, dest, border_edges, name) + read_border(parent, dest, XPath, get, border_edges, name) -def read_height(parent, dest): +def read_height(parent, dest, XPath, get): ans = inherit for rh in XPath('./w:trHeight')(parent): rule = get(rh, 'w:hRule', 'auto') @@ -103,14 +102,14 @@ def read_height(parent, dest): ans = (rule, val) setattr(dest, 'height', ans) -def read_vertical_align(parent, dest): +def read_vertical_align(parent, dest, XPath, get): ans = inherit for va in XPath('./w:vAlign')(parent): val = get(va, 'w:val') ans = {'center': 'middle', 'top': 'top', 'bottom': 'bottom'}.get(val, 'middle') setattr(dest, 'vertical_align', ans) -def read_col_span(parent, dest): +def read_col_span(parent, dest, XPath, get): ans = inherit for gs in XPath('./w:gridSpan')(parent): try: @@ -119,14 +118,14 @@ def read_col_span(parent, dest): continue setattr(dest, 'col_span', ans) -def read_merge(parent, dest): +def read_merge(parent, dest, XPath, get): for x in ('hMerge', 'vMerge'): ans = inherit for m in XPath('./w:%s' % x)(parent): ans = get(m, 'w:val', 'continue') setattr(dest, x, ans) -def read_band_size(parent, dest): +def read_band_size(parent, dest, XPath, get): for x in ('Col', 'Row'): ans = 1 for y in XPath('./w:tblStyle%sBandSize' % x)(parent): @@ -136,7 +135,7 @@ def read_band_size(parent, dest): continue setattr(dest, '%s_band_size' % x.lower(), ans) -def read_look(parent, dest): +def read_look(parent, dest, XPath, get): ans = 0 for x in XPath('./w:tblLook')(parent): try: @@ -148,8 +147,10 @@ def read_look(parent, dest): # }}} def clone(style): + if style is None: + return None try: - ans = type(style)() + ans = type(style)(style.namespace) except TypeError: return None ans.update(style) @@ -190,16 +191,17 @@ class RowStyle(Style): all_properties = ('height', 'cantSplit', 'hidden', 'spacing',) - def __init__(self, trPr=None): + def __init__(self, namespace, trPr=None): + self.namespace = namespace if trPr is None: for p in self.all_properties: setattr(self, p, inherit) else: for p in ('hidden', 'cantSplit'): - setattr(self, p, binary_property(trPr, p)) + setattr(self, p, binary_property(trPr, p, namespace.XPath, namespace.get)) for p in ('spacing', 'height'): f = globals()['read_%s' % p] - f(trPr, self) + f(trPr, self, namespace.XPath, namespace.get) self._css = None @property @@ -226,14 +228,15 @@ class CellStyle(Style): 'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span', ) + tuple(k % edge for edge in border_edges for k in border_props) - def __init__(self, tcPr=None): + def __init__(self, namespace, tcPr=None): + self.namespace = namespace if tcPr is None: for p in self.all_properties: setattr(self, p, inherit) else: for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'): f = globals()['read_%s' % x] - f(tcPr, self) + f(tcPr, self, namespace.XPath, namespace.get) self.row_span = inherit self._css = None @@ -270,7 +273,8 @@ class TableStyle(Style): 'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', ) + tuple(k % edge for edge in border_edges for k in border_props) - def __init__(self, tblPr=None): + def __init__(self, namespace, tblPr=None): + self.namespace = namespace if tblPr is None: for p in self.all_properties: setattr(self, p, inherit) @@ -278,23 +282,23 @@ class TableStyle(Style): self.overrides = inherit for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): f = globals()['read_%s' % x] - f(tblPr, self) + f(tblPr, self, self.namespace.XPath, self.namespace.get) parent = tblPr.getparent() - if is_tag(parent, 'w:style'): + if self.namespace.is_tag(parent, 'w:style'): self.overrides = {} - for tblStylePr in XPath('./w:tblStylePr[@w:type]')(parent): - otype = get(tblStylePr, 'w:type') + for tblStylePr in self.namespace.XPath('./w:tblStylePr[@w:type]')(parent): + otype = self.namespace.get(tblStylePr, 'w:type') orides = self.overrides[otype] = {} - for tblPr in XPath('./w:tblPr')(tblStylePr): - orides['table'] = TableStyle(tblPr) - for trPr in XPath('./w:trPr')(tblStylePr): - orides['row'] = RowStyle(trPr) - for tcPr in XPath('./w:tcPr')(tblStylePr): - orides['cell'] = CellStyle(tcPr) - for pPr in XPath('./w:pPr')(tblStylePr): - orides['para'] = ParagraphStyle(pPr) - for rPr in XPath('./w:rPr')(tblStylePr): - orides['run'] = RunStyle(rPr) + for tblPr in self.namespace.XPath('./w:tblPr')(tblStylePr): + orides['table'] = TableStyle(self.namespace, tblPr) + for trPr in self.namespace.XPath('./w:trPr')(tblStylePr): + orides['row'] = RowStyle(self.namespace, trPr) + for tcPr in self.namespace.XPath('./w:tcPr')(tblStylePr): + orides['cell'] = CellStyle(self.namespace, tcPr) + for pPr in self.namespace.XPath('./w:pPr')(tblStylePr): + orides['para'] = ParagraphStyle(self.namespace, pPr) + for rPr in self.namespace.XPath('./w:rPr')(tblStylePr): + orides['run'] = RunStyle(self.namespace, rPr) self._css = None def resolve_based_on(self, parent): @@ -343,16 +347,17 @@ class TableStyle(Style): class Table(object): - def __init__(self, tbl, styles, para_map, is_sub_table=False): + def __init__(self, namespace, tbl, styles, para_map, is_sub_table=False): + self.namespace = namespace self.tbl = tbl self.styles = styles self.is_sub_table = is_sub_table # Read Table Style - style = {'table':TableStyle()} - for tblPr in XPath('./w:tblPr')(tbl): - for ts in XPath('./w:tblStyle[@w:val]')(tblPr): - style_id = get(ts, 'w:val') + style = {'table':TableStyle(self.namespace)} + for tblPr in self.namespace.XPath('./w:tblPr')(tbl): + for ts in self.namespace.XPath('./w:tblStyle[@w:val]')(tblPr): + style_id = self.namespace.get(ts, 'w:val') s = styles.get(style_id) if s is not None: if s.table_style is not None: @@ -367,7 +372,7 @@ class Table(object): style['run'].update(s.character_style) else: style['run'] = s.character_style - style['table'].update(TableStyle(tblPr)) + style['table'].update(TableStyle(self.namespace, tblPr)) self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None) self.run_style = style.get('run', None) self.overrides = self.table_style.overrides @@ -380,23 +385,23 @@ class Table(object): self.paragraphs = [] self.cell_map = [] - rows = XPath('./w:tr')(tbl) + rows = self.namespace.XPath('./w:tr')(tbl) for r, tr in enumerate(rows): overrides = self.get_overrides(r, None, len(rows), None) self.resolve_row_style(tr, overrides) - cells = XPath('./w:tc')(tr) + cells = self.namespace.XPath('./w:tc')(tr) self.cell_map.append([]) for c, tc in enumerate(cells): overrides = self.get_overrides(r, c, len(rows), len(cells)) self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells)) self.cell_map[-1].append(tc) - for p in XPath('./w:p')(tc): + for p in self.namespace.XPath('./w:p')(tc): para_map[p] = self self.paragraphs.append(p) self.resolve_para_style(p, overrides) self.handle_merged_cells() - self.sub_tables = {x:Table(x, styles, para_map, is_sub_table=True) for x in XPath('./w:tr/w:tc/w:tbl')(tbl)} + self.sub_tables = {x:Table(namespace, x, styles, para_map, is_sub_table=True) for x in self.namespace.XPath('./w:tr/w:tc/w:tbl')(tbl)} def override_allowed(self, name): 'Check if the named override is allowed by the tblLook element' @@ -449,7 +454,7 @@ class Table(object): return tuple(filter(self.override_allowed, overrides)) def resolve_row_style(self, tr, overrides): - rs = RowStyle() + rs = RowStyle(self.namespace) for o in overrides: if o in self.overrides: ovr = self.overrides[o] @@ -457,12 +462,12 @@ class Table(object): if ors is not None: rs.update(ors) - for trPr in XPath('./w:trPr')(tr): - rs.update(RowStyle(trPr)) + for trPr in self.namespace.XPath('./w:trPr')(tr): + rs.update(RowStyle(self.namespace, trPr)) self.style_map[tr] = rs def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row): - cs = CellStyle() + cs = CellStyle(self.namespace) # from lxml.etree import tostring # txt = tostring(tc, method='text', encoding=unicode) for o in overrides: @@ -472,8 +477,8 @@ class Table(object): if ors is not None: cs.update(ors) - for tcPr in XPath('./w:tcPr')(tc): - cs.update(CellStyle(tcPr)) + for tcPr in self.namespace.XPath('./w:tcPr')(tc): + cs.update(CellStyle(self.namespace, tcPr)) for x in edges: p = 'cell_padding_%s' % x @@ -535,7 +540,7 @@ class Table(object): try: s = self.style_map[cell] except KeyError: # cell is None - s = CellStyle() + s = CellStyle(self.namespace) if s.vMerge == 'restart': runs.append([cell]) elif s.vMerge == 'continue': @@ -555,7 +560,7 @@ class Table(object): try: s = self.style_map[cell] except KeyError: # cell is None - s = CellStyle() + s = CellStyle(self.namespace) if s.col_span is not inherit: runs.append([]) continue @@ -593,12 +598,12 @@ class Table(object): parent.insert(idx, table) else: parent.append(table) - for row in XPath('./w:tr')(self.tbl): + for row in self.namespace.XPath('./w:tr')(self.tbl): tr = TR('\n\t\t\t') style_map[tr] = self.style_map[row] tr.tail = '\n\t\t' table.append(tr) - for tc in XPath('./w:tc')(row): + for tc in self.namespace.XPath('./w:tc')(row): td = TD() style_map[td] = s = self.style_map[tc] if s.col_span is not inherit: @@ -607,7 +612,7 @@ class Table(object): td.set('rowspan', type('')(s.row_span)) td.tail = '\n\t\t\t' tr.append(td) - for x in XPath('./w:p|./w:tbl')(tc): + for x in self.namespace.XPath('./w:p|./w:tbl')(tc): if x.tag.endswith('}p'): td.append(rmap[x]) else: @@ -627,15 +632,16 @@ class Table(object): class Tables(object): - def __init__(self): + def __init__(self, namespace): self.tables = [] self.para_map = {} self.sub_tables = set() + self.namespace = namespace def register(self, tbl, styles): if tbl in self.sub_tables: return - self.tables.append(Table(tbl, styles, self.para_map)) + self.tables.append(Table(self.namespace, tbl, styles, self.para_map)) self.sub_tables |= set(self.tables[-1].sub_tables) def apply_markup(self, object_map, page_map): diff --git a/src/calibre/ebooks/docx/theme.py b/src/calibre/ebooks/docx/theme.py index 708662168c..91526558c9 100644 --- a/src/calibre/ebooks/docx/theme.py +++ b/src/calibre/ebooks/docx/theme.py @@ -6,22 +6,21 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal ' -from calibre.ebooks.docx.names import XPath - class Theme(object): - def __init__(self): + def __init__(self, namespace): self.major_latin_font = 'Cambria' self.minor_latin_font = 'Calibri' + self.namespace = namespace def __call__(self, root): - for fs in XPath('//a:fontScheme')(root): - for mj in XPath('./a:majorFont')(fs): - for l in XPath('./a:latin[@typeface]')(mj): + for fs in self.namespace.XPath('//a:fontScheme')(root): + for mj in self.namespace.XPath('./a:majorFont')(fs): + for l in self.namespace.XPath('./a:latin[@typeface]')(mj): self.major_latin_font = l.get('typeface') - for mj in XPath('./a:minorFont')(fs): - for l in XPath('./a:latin[@typeface]')(mj): + for mj in self.namespace.XPath('./a:minorFont')(fs): + for l in self.namespace.XPath('./a:latin[@typeface]')(mj): self.minor_latin_font = l.get('typeface') def resolve_font_family(self, ff): diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index 08746ae698..4bbfe14426 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -15,9 +15,7 @@ from lxml.html.builder import ( from calibre import guess_type from calibre.ebooks.docx.container import DOCX, fromstring -from calibre.ebooks.docx.names import ( - XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor, - descendants, FOOTNOTES, ENDNOTES, children, THEMES, SETTINGS) +from calibre.ebooks.docx.names import XML, generate_anchor from calibre.ebooks.docx.styles import Styles, inherit, PageProperties from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts @@ -54,6 +52,7 @@ class Convert(object): def __init__(self, path_or_stream, dest_dir=None, log=None, detect_cover=True, notes_text=None): self.docx = DOCX(path_or_stream, log=log) + self.namespace = self.docx.namespace self.ms_pat = re.compile(r'\s{2,}') self.ws_pat = re.compile(r'[\n\r\t]') self.log = self.docx.log @@ -62,12 +61,12 @@ class Convert(object): self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() - self.theme = Theme() - self.settings = Settings() - self.tables = Tables() - self.fields = Fields() - self.styles = Styles(self.tables) - self.images = Images(self.log) + self.theme = Theme(self.namespace) + self.settings = Settings(self.namespace) + self.tables = Tables(self.namespace) + self.fields = Fields(self.namespace) + self.styles = Styles(self.namespace, self.tables) + self.images = Images(self.namespace, self.log) self.object_map = OrderedDict() self.html = HTML( HEAD( @@ -211,7 +210,7 @@ class Convert(object): html_obj.set('class', cls) if notes_header is not None: - for h in children(self.body, 'h1', 'h2', 'h3'): + for h in self.namespace.children(self.body, 'h1', 'h2', 'h3'): notes_header.tag = h.tag cls = h.get('class', None) if cls and cls != 'notes-header': @@ -221,7 +220,7 @@ class Convert(object): self.fields.polish_markup(self.object_map) self.log.debug('Cleaning up redundant markup generated by Word') - self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover) + self.cover_image = cleanup_markup(self.log, self.html, self.styles, self.dest_dir, self.detect_cover, self.namespace.XPath) return self.write(doc) @@ -230,14 +229,14 @@ class Convert(object): self.page_map = OrderedDict() self.section_starts = [] - for p in descendants(doc, 'w:p', 'w:tbl'): + for p in self.namespace.descendants(doc, 'w:p', 'w:tbl'): if p.tag.endswith('}tbl'): self.tables.register(p, self.styles) current.append(p) continue - sect = tuple(descendants(p, 'w:sectPr')) + sect = tuple(self.namespace.descendants(p, 'w:sectPr')) if sect: - pr = PageProperties(sect) + pr = PageProperties(self.namespace, sect) paras = current + [p] for x in paras: self.page_map[x] = pr @@ -248,8 +247,8 @@ class Convert(object): if current: self.section_starts.append(current[0]) - last = XPath('./w:body/w:sectPr')(doc) - pr = PageProperties(last) + last = self.namespace.XPath('./w:body/w:sectPr')(doc) + pr = PageProperties(self.namespace, last) for x in current: self.page_map[x] = pr @@ -264,16 +263,16 @@ class Convert(object): name = name return name - nname = get_name(NUMBERING, 'numbering.xml') - sname = get_name(STYLES, 'styles.xml') - sename = get_name(SETTINGS, 'settings.xml') - fname = get_name(FONTS, 'fontTable.xml') - tname = get_name(THEMES, 'theme1.xml') - foname = get_name(FOOTNOTES, 'footnotes.xml') - enname = get_name(ENDNOTES, 'endnotes.xml') - numbering = self.numbering = Numbering() - footnotes = self.footnotes = Footnotes() - fonts = self.fonts = Fonts() + nname = get_name(self.namespace.names['NUMBERING'], 'numbering.xml') + sname = get_name(self.namespace.names['STYLES'], 'styles.xml') + sename = get_name(self.namespace.names['SETTINGS'], 'settings.xml') + fname = get_name(self.namespace.names['FONTS'], 'fontTable.xml') + tname = get_name(self.namespace.names['THEMES'], 'theme1.xml') + foname = get_name(self.namespace.names['FOOTNOTES'], 'footnotes.xml') + enname = get_name(self.namespace.names['ENDNOTES'], 'endnotes.xml') + numbering = self.numbering = Numbering(self.namespace) + footnotes = self.footnotes = Footnotes(self.namespace) + fonts = self.fonts = Fonts(self.namespace) foraw = enraw = None forel, enrel = ({}, {}), ({}, {}) @@ -337,7 +336,7 @@ class Convert(object): self.styles.resolve_numbering(numbering) def write(self, doc): - toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log) + toc = create_toc(doc, self.body, self.resolved_link_map, self.styles, self.object_map, self.log, self.namespace) raw = html.tostring(self.html, encoding='utf-8', doctype='') with open(os.path.join(self.dest_dir, 'index.html'), 'wb') as f: f.write(raw) @@ -363,11 +362,11 @@ class Convert(object): return os.path.join(self.dest_dir, 'metadata.opf') def read_block_anchors(self, doc): - doc_anchors = frozenset(XPath('./w:body/w:bookmarkStart[@w:name]')(doc)) + doc_anchors = frozenset(self.namespace.XPath('./w:body/w:bookmarkStart[@w:name]')(doc)) if doc_anchors: current_bm = set() rmap = {v:k for k, v in self.object_map.iteritems()} - for p in descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'): + for p in self.namespace.descendants(doc, 'w:p', 'w:bookmarkStart[@w:name]'): if p.tag.endswith('}p'): if current_bm and p in rmap: para = rmap[p] @@ -377,7 +376,7 @@ class Convert(object): self.anchor_map[name] = para.get('id') current_bm = set() elif p in doc_anchors: - anchor = get(p, 'w:name') + anchor = self.namespace.get(p, 'w:name') if anchor: current_bm.add(anchor) @@ -390,7 +389,7 @@ class Convert(object): current_anchor = None current_hyperlink = None - hl_xpath = XPath('ancestor::w:hyperlink[1]') + hl_xpath = self.namespace.XPath('ancestor::w:hyperlink[1]') def p_parent(x): # Ensure that nested tags are handled. These can occur if a @@ -403,7 +402,7 @@ class Convert(object): except AttributeError: break - for x in descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): + for x in self.namespace.descendants(p, 'w:r', 'w:bookmarkStart', 'w:hyperlink'): if p_parent(x) is not p: continue if x.tag.endswith('}r'): @@ -422,7 +421,7 @@ class Convert(object): dest.append(span) self.layers[p].append(x) elif x.tag.endswith('}bookmarkStart'): - anchor = get(x, 'w:name') + anchor = self.namespace.get(x, 'w:name') if anchor and anchor not in self.anchor_map: old_anchor = current_anchor self.anchor_map[anchor] = current_anchor = generate_anchor(anchor, frozenset(self.anchor_map.itervalues())) @@ -502,17 +501,17 @@ class Convert(object): span = self.wrap_elems(spans, SPAN()) span.tag = 'a' self.resolved_link_map[hyperlink] = span - tgt = get(hyperlink, 'w:tgtFrame') + tgt = self.namespace.get(hyperlink, 'w:tgtFrame') if tgt: span.set('target', tgt) - tt = get(hyperlink, 'w:tooltip') + tt = self.namespace.get(hyperlink, 'w:tooltip') if tt: span.set('title', tt) - rid = get(hyperlink, 'r:id') + rid = self.namespace.get(hyperlink, 'r:id') if rid and rid in relationships_by_id: span.set('href', relationships_by_id[rid]) continue - anchor = get(hyperlink, 'w:anchor') + anchor = self.namespace.get(hyperlink, 'w:anchor') if anchor and anchor in self.anchor_map: span.set('href', '#' + self.anchor_map[anchor]) continue @@ -576,7 +575,7 @@ class Convert(object): text = Text(ans, 'text', []) for child in run: - if is_tag(child, 'w:t'): + if self.namespace.is_tag(child, 'w:t'): if not child.text: continue space = child.get(XML('space'), None) @@ -596,11 +595,11 @@ class Convert(object): ans.append(text.elem) else: text.buf.append(ctext) - elif is_tag(child, 'w:cr'): + elif self.namespace.is_tag(child, 'w:cr'): text.add_elem(BR()) ans.append(text.elem) - elif is_tag(child, 'w:br'): - typ = get(child, 'w:type') + elif self.namespace.is_tag(child, 'w:br'): + typ = self.namespace.get(child, 'w:type') if typ in {'column', 'page'}: br = BR(style='page-break-after:always') else: @@ -611,25 +610,25 @@ class Convert(object): br = BR() text.add_elem(br) ans.append(text.elem) - elif is_tag(child, 'w:drawing') or is_tag(child, 'w:pict'): + elif self.namespace.is_tag(child, 'w:drawing') or self.namespace.is_tag(child, 'w:pict'): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) - elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): + elif self.namespace.is_tag(child, 'w:footnoteReference') or self.namespace.is_tag(child, 'w:endnoteReference'): anchor, name = self.footnotes.get_ref(child) if anchor and name: l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) l.set('class', 'noteref') text.add_elem(l) ans.append(text.elem) - elif is_tag(child, 'w:tab'): + elif self.namespace.is_tag(child, 'w:tab'): spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6)) text.add_elem(SPAN(NBSP * spaces)) ans.append(text.elem) ans[-1].set('class', 'tab') - elif is_tag(child, 'w:noBreakHyphen'): + elif self.namespace.is_tag(child, 'w:noBreakHyphen'): text.buf.append(u'\u2011') - elif is_tag(child, 'w:softHyphen'): + elif self.namespace.is_tag(child, 'w:softHyphen'): text.buf.append(u'\u00ad') if text.buf: setattr(text.elem, text.attr, ''.join(text.buf)) diff --git a/src/calibre/ebooks/docx/toc.py b/src/calibre/ebooks/docx/toc.py index 65c66ca6db..f099c816e3 100644 --- a/src/calibre/ebooks/docx/toc.py +++ b/src/calibre/ebooks/docx/toc.py @@ -10,7 +10,6 @@ from collections import namedtuple from lxml.etree import tostring -from calibre.ebooks.docx.names import XPath, descendants, get, ancestor from calibre.ebooks.metadata.toc import TOC from calibre.ebooks.oeb.polish.toc import elem_to_toc_text @@ -21,8 +20,9 @@ class Count(object): def __init__(self): self.val = 0 -def from_headings(body, log): +def from_headings(body, log, namespace): ' Create a TOC from headings in the document ' + XPath, descendants = namespace.XPath, namespace.descendants headings = ('h1', 'h2', 'h3') tocroot = TOC() xpaths = [XPath('//%s' % x) for x in headings] @@ -99,7 +99,8 @@ def link_to_txt(a, styles, object_map): return tostring(a, method='text', with_tail=False, encoding=unicode).strip() -def from_toc(docx, link_map, styles, object_map, log): +def from_toc(docx, link_map, styles, object_map, log, namespace): + XPath, get, ancestor = namespace.XPath, namespace.get, namespace.ancestor toc_level = None level = 0 TI = namedtuple('TI', 'text anchor indent') @@ -136,7 +137,5 @@ def from_toc(docx, link_map, styles, object_map, log): log('Found Word Table of Contents, using it to generate the Table of Contents') return structure_toc(toc) -def create_toc(docx, body, link_map, styles, object_map, log): - return from_toc(docx, link_map, styles, object_map, log) or from_headings(body, log) - - +def create_toc(docx, body, link_map, styles, object_map, log, namespace): + return from_toc(docx, link_map, styles, object_map, log, namespace) or from_headings(body, log, namespace) diff --git a/src/calibre/ebooks/docx/writer/container.py b/src/calibre/ebooks/docx/writer/container.py index 30f6dbb6ab..9d3eb29d08 100644 --- a/src/calibre/ebooks/docx/writer/container.py +++ b/src/calibre/ebooks/docx/writer/container.py @@ -13,7 +13,7 @@ from lxml.builder import ElementMaker from calibre import guess_type from calibre.constants import numeric_version, __appname__ -from calibre.ebooks.docx.names import namespaces, STYLES, WEB_SETTINGS, IMAGES, FONTS +from calibre.ebooks.docx.names import DOCXNamespace from calibre.ebooks.metadata import authors_to_string from calibre.utils.date import utcnow from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 @@ -27,7 +27,8 @@ def xml2str(root, pretty_print=False, with_tail=False): pretty_print=pretty_print, with_tail=with_tail) return ans -def create_skeleton(opts): +def create_skeleton(opts, namespaces=None): + namespaces = namespaces or DOCXNamespace().namespaces def w(x): return '{%s}%s' % (namespaces['w'], x) dn = {k:v for k, v in namespaces.iteritems() if k in {'w', 'r', 'm', 've', 'o', 'wp', 'w10', 'wne', 'a', 'pic'}} @@ -70,9 +71,9 @@ def create_skeleton(opts): return doc, styles, body -def update_doc_props(root, mi): +def update_doc_props(root, mi, namespace): def setm(name, text=None, ns='dc'): - ans = root.makeelement('{%s}%s' % (namespaces[ns], name)) + ans = root.makeelement('{%s}%s' % (namespace.namespaces[ns], name)) for child in tuple(root): if child.tag == ans.tag: root.remove(child) @@ -92,12 +93,13 @@ def update_doc_props(root, mi): class DocumentRelationships(object): - def __init__(self): + def __init__(self, namespace): self.rmap = {} + self.namespace = namespace for typ, target in { - STYLES: 'styles.xml', - WEB_SETTINGS: 'webSettings.xml', - FONTS: 'fontTable.xml', + namespace.names['STYLES']: 'styles.xml', + namespace.names['WEB_SETTINGS']: 'webSettings.xml', + namespace.names['FONTS']: 'fontTable.xml', }.iteritems(): self.add_relationship(target, typ) @@ -112,9 +114,10 @@ class DocumentRelationships(object): return ans def add_image(self, target): - return self.add_relationship(target, IMAGES) + return self.add_relationship(target, self.namespace.names['IMAGES']) def serialize(self): + namespaces = self.namespace.namespaces E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']}) relationships = E.Relationships() for (target, rtype, target_mode), rid in self.rmap.iteritems(): @@ -127,8 +130,10 @@ class DocumentRelationships(object): class DOCX(object): def __init__(self, opts, log): + self.namespace = DOCXNamespace() + namespaces = self.namespace.namespaces self.opts, self.log = opts, log - self.document_relationships = DocumentRelationships() + self.document_relationships = DocumentRelationships(self.namespace) self.font_table = etree.Element('{%s}fonts' % namespaces['w'], nsmap={k:namespaces[k] for k in 'wr'}) E = ElementMaker(namespace=namespaces['pr'], nsmap={None:namespaces['pr']}) self.embedded_fonts = E.Relationships() @@ -138,7 +143,7 @@ class DOCX(object): # Boilerplate {{{ @property def contenttypes(self): - E = ElementMaker(namespace=namespaces['ct'], nsmap={None:namespaces['ct']}) + E = ElementMaker(namespace=self.namespace.namespaces['ct'], nsmap={None:self.namespace.namespaces['ct']}) types = E.Types() for partname, mt in { "/word/footnotes.xml": "application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml", @@ -174,7 +179,7 @@ class DOCX(object): @property def appproperties(self): - E = ElementMaker(namespace=namespaces['ep'], nsmap={None:namespaces['ep']}) + E = ElementMaker(namespace=self.namespace.namespaces['ep'], nsmap={None:self.namespace.namespaces['ep']}) props = E.Properties( E.Application(__appname__), E.AppVersion('%02d.%04d' % numeric_version[:2]), @@ -193,14 +198,14 @@ class DOCX(object): return textwrap.dedent(b'''\ - - - - ''') + + + + '''.format(**self.namespace.names)) @property def websettings(self): - E = ElementMaker(namespace=namespaces['w'], nsmap={'w':namespaces['w']}) + E = ElementMaker(namespace=self.namespace.namespaces['w'], nsmap={'w':self.namespace.namespaces['w']}) ws = E.webSettings( E.optimizeForBrowser, E.allowPNG, E.doNotSaveAsSingleFile) return xml2str(ws) @@ -208,6 +213,7 @@ class DOCX(object): # }}} def convert_metadata(self, mi): + namespaces = self.namespace.namespaces E = ElementMaker(namespace=namespaces['cp'], nsmap={x:namespaces[x] for x in 'cp dc dcterms xsi'.split()}) cp = E.coreProperties(E.revision("1"), E.lastModifiedBy('calibre')) ts = utcnow().isoformat(str('T')).rpartition('.')[0] + 'Z' @@ -216,7 +222,7 @@ class DOCX(object): x.text = ts cp.append(x) self.mi = mi - update_doc_props(cp, self.mi) + update_doc_props(cp, self.mi, self.namespace) return xml2str(cp) def create_empty_document(self, mi): diff --git a/src/calibre/ebooks/docx/writer/fonts.py b/src/calibre/ebooks/docx/writer/fonts.py index 64586e82f3..93fd880c2c 100644 --- a/src/calibre/ebooks/docx/writer/fonts.py +++ b/src/calibre/ebooks/docx/writer/fonts.py @@ -9,7 +9,6 @@ __copyright__ = '2015, Kovid Goyal ' from collections import defaultdict from uuid import uuid4 -from calibre.ebooks.docx.names import makeelement, EMBEDDED_FONT from calibre.ebooks.oeb.base import OEB_STYLES from calibre.ebooks.oeb.transforms.subset import find_font_face_rules @@ -21,10 +20,12 @@ def obfuscate_font_data(data, key): class FontsManager(object): - def __init__(self, oeb, opts): + def __init__(self, namespace, oeb, opts): + self.namespace = namespace self.oeb, self.log, self.opts = oeb, oeb.log, opts def serialize(self, text_styles, fonts, embed_relationships, font_data_map): + makeelement = self.namespace.makeelement font_families, seen = set(), set() for ts in text_styles: if ts.font_family: @@ -68,7 +69,7 @@ class FontsManager(object): if rid is None: rel_map[item] = rid = 'rId%d' % num fname = 'fonts/font%d.odttf' % num - makeelement(embed_relationships, 'Relationship', Id=rid, Type=EMBEDDED_FONT, Target=fname) + makeelement(embed_relationships, 'Relationship', Id=rid, Type=self.namespace.names['EMBEDDED_FONT'], Target=fname) font_data_map['word/' + fname] = obfuscate_font_data(item.data, key) makeelement(font, 'w:embed' + tag, r_id=rid, w_fontKey='{%s}' % key.urn.rpartition(':')[-1].upper(), diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index fd9e56bf61..6929e445bf 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal ' import re from calibre.ebooks.docx.writer.container import create_skeleton -from calibre.ebooks.docx.writer.styles import w, StylesManager +from calibre.ebooks.docx.writer.styles import StylesManager from calibre.ebooks.docx.writer.images import ImagesManager from calibre.ebooks.docx.writer.fonts import FontsManager from calibre.ebooks.docx.writer.tables import Table @@ -45,12 +45,13 @@ class TextRun(object): ws_pat = None - def __init__(self, style, first_html_parent): + def __init__(self, namespace, style, first_html_parent): self.first_html_parent = first_html_parent if self.ws_pat is None: TextRun.ws_pat = self.ws_pat = re.compile(r'\s+') self.style = style self.texts = [] + self.makelement = namespace.makeelement def add_text(self, text, preserve_whitespace): if not preserve_whitespace: @@ -68,19 +69,18 @@ class TextRun(object): self.texts.append((drawing, None)) def serialize(self, p): - r = p.makeelement(w('r')) - p.append(r) - rpr = r.makeelement(w('rPr')) - rpr.append(rpr.makeelement(w('rStyle'), **{w('val'):self.style.id})) - r.append(rpr) + makeelement = self.makelement + r = makeelement(p, 'w:r') + rpr = makeelement(r, 'w:rPr') + makeelement(rpr, 'w:rStyle', w_val=self.style.id) + for text, preserve_whitespace in self.texts: if text is None: - r.append(r.makeelement(w('br'), **{w('clear'):preserve_whitespace})) + makeelement(r, 'w:br', w_clear=preserve_whitespace) elif hasattr(text, 'xpath'): r.append(text) else: - t = r.makeelement(w('t')) - r.append(t) + t = makeelement(r, 'w:t') t.text = text or '' if preserve_whitespace: t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') @@ -94,7 +94,8 @@ class TextRun(object): class Block(object): - def __init__(self, styles_manager, html_block, style, is_table_cell=False): + def __init__(self, namespace, styles_manager, html_block, style, is_table_cell=False): + self.namespace = namespace self.html_block = html_block self.html_style = style self.style = styles_manager.create_block_style(style, html_block, is_table_cell=is_table_cell) @@ -109,7 +110,7 @@ class Block(object): if self.runs and ts == self.runs[-1].style: run = self.runs[-1] else: - run = TextRun(ts, self.html_block if html_parent is None else html_parent) + run = TextRun(self.namespace, ts, self.html_block if html_parent is None else html_parent) self.runs.append(run) preserve_whitespace = ws in {'pre', 'pre-wrap'} if ignore_leading_whitespace and not preserve_whitespace: @@ -125,7 +126,7 @@ class Block(object): if self.runs: run = self.runs[-1] else: - run = TextRun(self.styles_manager.create_text_style(self.html_style), self.html_block) + run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block) self.runs.append(run) run.add_break(clear=clear) @@ -133,20 +134,19 @@ class Block(object): if self.runs: run = self.runs[-1] else: - run = TextRun(self.styles_manager.create_text_style(self.html_style), self.html_block) + run = TextRun(self.namespace, self.styles_manager.create_text_style(self.html_style), self.html_block) self.runs.append(run) run.add_image(drawing) def serialize(self, body): - p = body.makeelement(w('p')) - body.append(p) - ppr = p.makeelement(w('pPr')) - p.append(ppr) + makeelement = self.namespace.makeelement + p = makeelement(body, 'w:p') + ppr = makeelement(p, 'w:pPr') if self.keep_next: - ppr.append(ppr.makeelement(w('keepNext'))) + makeelement(ppr, 'w:keepNext') if self.page_break_before: - ppr.append(ppr.makeelement(w('pageBreakBefore'))) - ppr.append(ppr.makeelement(w('pStyle'), **{w('val'):self.style.id})) + makeelement(ppr, 'w:pageBreakBefore') + makeelement(ppr, 'w:pStyle', w_val=self.style.id) for run in self.runs: run.serialize(p) @@ -158,7 +158,8 @@ class Block(object): class Blocks(object): - def __init__(self, styles_manager): + def __init__(self, namespace, styles_manager): + self.namespace = namespace self.styles_manager = styles_manager self.all_blocks = [] self.pos = 0 @@ -183,12 +184,12 @@ class Blocks(object): def start_new_block(self, html_block, style, is_table_cell=False): self.end_current_block() - self.current_block = Block(self.styles_manager, html_block, style, is_table_cell=is_table_cell) + self.current_block = Block(self.namespace, self.styles_manager, html_block, style, is_table_cell=is_table_cell) self.open_html_blocks.add(html_block) return self.current_block def start_new_table(self, html_tag, tag_style=None): - self.current_table = Table(html_tag, tag_style) + self.current_table = Table(self.namespace, html_tag, tag_style) self.tables.append(self.current_table) def start_new_row(self, html_tag, tag_style): @@ -252,10 +253,10 @@ class Convert(object): self.svg_rasterizer = SVGRasterizer() self.svg_rasterizer(self.oeb, self.opts) - self.styles_manager = StylesManager() + self.styles_manager = StylesManager(self.docx.namespace) self.images_manager = ImagesManager(self.oeb, self.docx.document_relationships) - self.fonts_manager = FontsManager(self.oeb, self.opts) - self.blocks = Blocks(self.styles_manager) + self.fonts_manager = FontsManager(self.docx.namespace, self.oeb, self.opts) + self.blocks = Blocks(self.docx.namespace, self.styles_manager) for item in self.oeb.spine: self.process_item(item) diff --git a/src/calibre/ebooks/docx/writer/images.py b/src/calibre/ebooks/docx/writer/images.py index 9b8e1a7d5a..4b87e0e67c 100644 --- a/src/calibre/ebooks/docx/writer/images.py +++ b/src/calibre/ebooks/docx/writer/images.py @@ -15,7 +15,6 @@ from future_builtins import map from lxml import etree from calibre.ebooks.oeb.base import urlunquote -from calibre.ebooks.docx.names import makeelement, namespaces from calibre.ebooks.docx.images import pt_to_emu from calibre.utils.filenames import ascii_filename from calibre.utils.magick.draw import identify_data @@ -68,6 +67,8 @@ class ImagesManager(object): name = urlunquote(posixpath.basename(href)) width, height = map(pt_to_emu, style.img_size(img.width, img.height)) + makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces + root = etree.Element('root', nsmap=namespaces) ans = makeelement(root, 'w:drawing', append=False) if floating is None: diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index ffa21da27c..cc98ca5861 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -12,7 +12,6 @@ from operator import attrgetter from lxml import etree from calibre.ebooks import parse_css_length -from calibre.ebooks.docx.names import namespaces from calibre.ebooks.docx.writer.utils import convert_color, int_or_zero from calibre.utils.icu import numeric_sort_key from tinycss.css21 import CSS21Parser @@ -38,12 +37,6 @@ def css_font_family_to_docx(raw): for ff in parse_css_font_family(raw): return generic.get(ff.lower(), ff) -def w(x): - return '{%s}%s' % (namespaces['w'], x) - -def makeelement(parent, name, **attrs): - return parent.makeelement(w(name), **{w(k):v for k, v in attrs.iteritems()}) - def bmap(x): return 'on' if x else 'off' @@ -52,12 +45,17 @@ class DOCXStyle(object): ALL_PROPS = () TYPE = 'paragraph' - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace + self.w = lambda x: '{%s}%s' % (namespace.namespaces['w'], x) self._hash = hash(tuple( getattr(self, x) for x in self.ALL_PROPS)) self.id = self.name = None self.next_style = None + def makeelement(self, parent, name, **attrs): + return parent.makeelement(self.w(name), **{self.w(k):v for k, v in attrs.iteritems()}) + def __hash__(self): return self._hash @@ -71,10 +69,11 @@ class DOCXStyle(object): return not self == other def __repr__(self): - return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':namespaces['w']})), pretty_print=True) + return etree.tostring(self.serialize(etree.Element(self.__class__.__name__, nsmap={'w':self.namespace.namespaces['w']})), pretty_print=True) __str__ = __repr__ def serialize(self, styles, normal_style): + w, makeelement = self.w, self.makeelement style = makeelement(styles, 'style', styleId=self.id, type=self.TYPE) style.append(makeelement(style, 'name', val=self.name)) if self is normal_style: @@ -106,7 +105,7 @@ class TextStyle(DOCXStyle): 'border_style', 'border_width', 'border_color') TYPE = 'character' - def __init__(self, css, is_parent_style=False): + def __init__(self, namespace, css, is_parent_style=False): self.font_family = css_font_family_to_docx(css['font-family']) try: self.font_size = max(0, int(float(css['font-size']) * 2)) # stylizer normalizes all font sizes into pts @@ -163,9 +162,10 @@ class TextStyle(DOCXStyle): elif self.border_style != style: self.border_style = ignore - DOCXStyle.__init__(self) + DOCXStyle.__init__(self, namespace) def serialize_borders(self, bdr, normal_style): + w = self.w if (self.padding not in (None, ignore, 0) and self is normal_style) or self.padding != normal_style.padding: bdr.set(w('space'), str(0 if self.padding in (None, ignore) else self.padding)) if (self.border_width not in (None, ignore, 0) and self is normal_style) or self.border_width != normal_style.border_width: @@ -177,6 +177,7 @@ class TextStyle(DOCXStyle): return bdr def serialize(self, styles, normal_style): + makeelement = self.makeelement style_root = DOCXStyle.serialize(self, styles, normal_style) style = makeelement(style_root, 'rPr') @@ -273,7 +274,7 @@ class BlockStyle(DOCXStyle): [x%edge for edge in border_edges for x in border_props] ) - def __init__(self, css, html_block, is_table_cell=False): + def __init__(self, namespace, css, html_block, is_table_cell=False): read_css_block_borders(self, css) if is_table_cell: for edge in border_edges: @@ -298,9 +299,10 @@ class BlockStyle(DOCXStyle): self.text_align = {'start':'left', 'left':'left', 'end':'right', 'right':'right', 'center':'center', 'justify':'both', 'centre':'center'}.get( css['text-align'].lower(), 'left') - DOCXStyle.__init__(self) + DOCXStyle.__init__(self, namespace) def serialize_borders(self, bdr, normal_style): + w = self.w for edge in border_edges: e = bdr.makeelement(w(edge)) padding = getattr(self, 'padding_' + edge) @@ -319,6 +321,7 @@ class BlockStyle(DOCXStyle): return bdr def serialize(self, styles, normal_style): + w, makeelement = self.w, self.makeelement style_root = DOCXStyle.serialize(self, styles, normal_style) style = makeelement(style_root, 'pPr') @@ -393,11 +396,12 @@ class BlockStyle(DOCXStyle): class StylesManager(object): - def __init__(self): + def __init__(self, namespace): + self.namespace = namespace self.block_styles, self.text_styles = {}, {} def create_text_style(self, css_style, is_parent_style=False): - ans = TextStyle(css_style, is_parent_style=is_parent_style) + ans = TextStyle(self.namespace, css_style, is_parent_style=is_parent_style) existing = self.text_styles.get(ans, None) if existing is None: self.text_styles[ans] = ans @@ -406,7 +410,7 @@ class StylesManager(object): return ans def create_block_style(self, css_style, html_block, is_table_cell=False): - ans = BlockStyle(css_style, html_block, is_table_cell=is_table_cell) + ans = BlockStyle(self.namespace, css_style, html_block, is_table_cell=is_table_cell) existing = self.block_styles.get(ans, None) if existing is None: self.block_styles[ans] = ans diff --git a/src/calibre/ebooks/docx/writer/tables.py b/src/calibre/ebooks/docx/writer/tables.py index 12bcfe2e5f..8c45646b46 100644 --- a/src/calibre/ebooks/docx/writer/tables.py +++ b/src/calibre/ebooks/docx/writer/tables.py @@ -8,7 +8,6 @@ __copyright__ = '2015, Kovid Goyal ' from collections import namedtuple -from calibre.ebooks.docx.names import makeelement from calibre.ebooks.docx.writer.utils import convert_color from calibre.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges @@ -29,7 +28,7 @@ class SpannedCell(object): def resolve_borders(self): pass - def serialize(self, tr): + def serialize(self, tr, makeelement): tc = makeelement(tr, 'w:tc') tcPr = makeelement(tc, 'w:tcPr') makeelement(tcPr, 'w:%sMerge' % ('h' if self.horizontal else 'v'), w_val='continue') @@ -70,14 +69,6 @@ def convert_width(tag_style): pass return ('auto', 0) -def serialize_border_edge(self, bdr, edge): - width = getattr(self, 'border_%s_width' % edge) - bstyle = getattr(self, 'border_%s_style' % edge) - if width > 0 and bstyle != 'none': - makeelement(bdr, 'w:' + edge, w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge)) - return True - return False - class Cell(object): BLEVEL = 2 @@ -107,7 +98,7 @@ class Cell(object): self.items.append(table) return table - def serialize(self, parent): + def serialize(self, parent, makeelement): tc = makeelement(parent, 'w:tc') tcPr = makeelement(tc, 'w:tcPr') makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=str(self.width[1])) @@ -240,16 +231,17 @@ class Row(object): def add_table(self, table): return self.current_cell.add_table(table) - def serialize(self, parent): + def serialize(self, parent, makeelement): tr = makeelement(parent, 'w:tr') for cell in self.cells: - cell.serialize(tr) + cell.serialize(tr, makeelement) class Table(object): BLEVEL = 0 - def __init__(self, html_tag, tag_style=None): + def __init__(self, namespace, html_tag, tag_style=None): + self.namespace = namespace self.html_tag = html_tag self.rows = [] self.current_row = None @@ -329,6 +321,7 @@ class Table(object): return self.current_row.add_table(table) def serialize(self, parent): + makeelement = self.namespace.makeelement rows = [r for r in self.rows if r.cells] if not rows: return @@ -338,4 +331,4 @@ class Table(object): if self.jc is not None: makeelement(tblPr, 'w:jc', w_val=self.jc) for row in rows: - row.serialize(tbl) + row.serialize(tbl, makeelement) diff --git a/src/calibre/ebooks/metadata/docx.py b/src/calibre/ebooks/metadata/docx.py index 19a0249195..fa56f265d6 100644 --- a/src/calibre/ebooks/metadata/docx.py +++ b/src/calibre/ebooks/metadata/docx.py @@ -12,14 +12,14 @@ from io import BytesIO from lxml import etree from calibre.ebooks.docx.container import DOCX -from calibre.ebooks.docx.writer.container import update_doc_props, xml2str, namespaces -from calibre.ebooks.docx.names import XPath, get +from calibre.ebooks.docx.writer.container import update_doc_props, xml2str from calibre.utils.magick.draw import identify_data -images = XPath('//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]') - def get_cover(docx): doc = docx.document + get = docx.namespace.get + images = docx.namespace.XPath( + '//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]') rid_map = docx.document_relationships[0] for image in images(doc): rid = get(image, 'r:embed') or get(image, 'r:id') @@ -58,11 +58,11 @@ def set_metadata(stream, mi): except Exception: ap_raw = None cp = etree.fromstring(dp_raw) - update_doc_props(cp, mi) + update_doc_props(cp, mi, c.namespace) replacements = {} if ap_raw is not None: ap = etree.fromstring(ap_raw) - comp = ap.makeelement('{%s}Company' % namespaces['ep']) + comp = ap.makeelement('{%s}Company' % c.namespace.namespaces['ep']) for child in tuple(ap): if child.tag == comp.tag: ap.remove(child)