diff --git a/src/calibre/ebooks/conversion/plugins/docx_output.py b/src/calibre/ebooks/conversion/plugins/docx_output.py index 486416253c..c3d20ddafe 100644 --- a/src/calibre/ebooks/conversion/plugins/docx_output.py +++ b/src/calibre/ebooks/conversion/plugins/docx_output.py @@ -32,6 +32,9 @@ class DOCXOutput(OutputFormatPlugin): help=_('Do not insert the book cover as an image at the start of the document.' ' If you use this option, the book cover will be discarded.')), + OptionRecommendation(name='docx_no_toc', recommended_value=False, + help=_('Do not insert the table of contents as a page at the start of the document.')), + OptionRecommendation(name='extract_to', help=_('Extract the contents of the generated %s file to the ' 'specified directory. The contents of the directory are first ' @@ -59,7 +62,7 @@ class DOCXOutput(OutputFormatPlugin): from calibre.ebooks.docx.writer.from_html import Convert docx = DOCX(opts, log) self.convert_metadata(oeb) - Convert(oeb, docx, self.mi, not opts.docx_no_cover)() + Convert(oeb, docx, self.mi, not opts.docx_no_cover, not opts.docx_no_toc)() docx.write(output_path, self.mi) if opts.extract_to: from calibre.ebooks.docx.dump import do_dump diff --git a/src/calibre/ebooks/docx/writer/from_html.py b/src/calibre/ebooks/docx/writer/from_html.py index ff7d80267b..233040de8b 100644 --- a/src/calibre/ebooks/docx/writer/from_html.py +++ b/src/calibre/ebooks/docx/writer/from_html.py @@ -63,7 +63,7 @@ class TextRun(object): self.link = None self.lang = lang self.parent_style = None - self.makelement = namespace.makeelement + self.makeelement = namespace.makeelement def add_text(self, text, preserve_whitespace, bookmark=None, link=None): if not preserve_whitespace: @@ -82,7 +82,7 @@ class TextRun(object): self.texts.append((drawing, None, bookmark)) def serialize(self, p, links_manager): - makeelement = self.makelement + makeelement = self.makeelement parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link) r = makeelement(parent, 'w:r') rpr = makeelement(r, 'w:rPr', append=False) @@ -390,8 +390,8 @@ class Convert(object): a[href] { text-decoration: underline; color: blue } ''' - def __init__(self, oeb, docx, mi, add_cover): - self.oeb, self.docx, self.add_cover = oeb, docx, add_cover + def __init__(self, oeb, docx, mi, add_cover, add_toc): + self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc self.log, self.opts = docx.log, docx.opts self.mi = mi self.cover_img = None @@ -411,6 +411,8 @@ class Convert(object): for item in self.oeb.spine: self.process_item(item) + if self.add_toc: + self.links_manager.process_toc_links(self.oeb) if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids: cover_id = unicode(self.oeb.metadata.cover[0]) @@ -557,6 +559,8 @@ class Convert(object): self.docx.document, self.docx.styles, body = create_skeleton(self.opts) self.blocks.serialize(body) body.append(body[0]) # Move to the end + if self.links_manager.toc: + self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style) if self.cover_img is not None: self.images_manager.write_cover_block(body, self.cover_img) self.styles_manager.serialize(self.docx.styles) diff --git a/src/calibre/ebooks/docx/writer/links.py b/src/calibre/ebooks/docx/writer/links.py index e379782801..d41c3edeb5 100644 --- a/src/calibre/ebooks/docx/writer/links.py +++ b/src/calibre/ebooks/docx/writer/links.py @@ -6,10 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import, __license__ = 'GPL v3' __copyright__ = '2015, Kovid Goyal ' -import posixpath +import posixpath, re from uuid import uuid4 from urlparse import urlparse +from calibre.utils.filenames import ascii_text def start_text(tag, prefix_len=0, top_level=True): ans = tag.text or '' @@ -23,17 +24,53 @@ def start_text(tag, prefix_len=0, top_level=True): ans = ans[:limit] + '...' return ans +class TOCItem(object): + + def __init__(self, title, bmark, level): + self.title, self.bmark, self.level = title, bmark, level + self.is_first = self.is_last = False + + def serialize(self, body, makeelement): + p = makeelement(body, 'w:p', append=False) + ppr = makeelement(p, 'w:pPr') + makeelement(ppr, 'w:pStyle', w_val="Normal") + makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level)) + if self.is_first: + makeelement(ppr, 'w:pageBreakBefore', w_val='off') + r = makeelement(p, 'w:r') + makeelement(r, 'w:fldChar', w_fldCharType='begin') + r = makeelement(p, 'w:r') + makeelement(r, 'w:instrText').text = ' TOC \h ' + r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') + r = makeelement(p, 'w:r') + makeelement(r, 'w:fldChar', w_fldCharType='separate') + hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark) + r = makeelement(hl, 'w:r') + rpr = makeelement(r, 'w:rPr') + makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink') + makeelement(rpr, 'w:u', w_val='single') + makeelement(r, 'w:t').text = self.title + if self.is_last: + r = makeelement(p, 'w:r') + makeelement(r, 'w:fldChar', w_fldCharType='end') + body.insert(0, p) + +def sanitize_bookmark_name(base): + return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base)) + + class LinksManager(object): def __init__(self, namespace, document_relationships): self.namespace = namespace - self.docment_relationships = document_relationships + self.document_relationships = document_relationships self.top_anchor = type('')(uuid4().hex) self.anchor_map = {} self.used_bookmark_names = set() self.bmark_id = 0 self.document_hrefs = set() self.external_links = {} + self.toc = [] def bookmark_for_anchor(self, anchor, current_item, html_tag): key = (current_item.href, anchor) @@ -44,10 +81,11 @@ class LinksManager(object): self.document_hrefs.add(current_item.href) else: name = start_text(html_tag).strip() or anchor + name = sanitize_bookmark_name(name) i, bname = 0, name while name in self.used_bookmark_names: i += 1 - name = bname + (' %d' % i) + name = bname + ('_%d' % i) self.anchor_map[key] = name return name @@ -71,6 +109,47 @@ class LinksManager(object): return self.namespace.makeelement(parent, 'w:hyperlink', w_anchor=bmark, w_tooltip=tooltip or '') if purl.scheme in {'http', 'https', 'ftp'}: if url not in self.external_links: - self.external_links[url] = self.docment_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External') + self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External') return self.namespace.makeelement(parent, 'w:hyperlink', r_id=self.external_links[url], w_tooltip=tooltip or '') return parent + + def process_toc_node(self, toc, level=0): + href = toc.href + if href: + purl = urlparse(href) + href = purl.path + if href in self.document_hrefs: + key = (href, purl.fragment or self.top_anchor) + if key in self.anchor_map: + bmark = self.anchor_map[key] + else: + bmark = self.anchor_map[(href, self.top_anchor)] + self.toc.append(TOCItem(toc.title, bmark, level)) + for child in toc: + self.process_toc_node(child, level+1) + + def process_toc_links(self, oeb): + self.toc = [] + has_toc = oeb.toc and oeb.toc.count() > 1 + if not has_toc: + return + for child in oeb.toc: + self.process_toc_node(child) + if self.toc: + self.toc[0].is_first = True + self.toc[-1].is_last = True + + def serialize_toc(self, body, primary_heading_style): + pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0] + pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on') + for block in reversed(self.toc): + block.serialize(body, self.namespace.makeelement) + title = __('Table of Contents') + makeelement = self.namespace.makeelement + p = makeelement(body, 'w:p', append=False) + ppr = makeelement(p, 'w:pPr') + if primary_heading_style is not None: + makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id) + makeelement(ppr, 'w:pageBreakBefore', w_val='off') + makeelement(makeelement(p, 'w:r'), 'w:t').text = title + body.insert(0, p) diff --git a/src/calibre/ebooks/docx/writer/styles.py b/src/calibre/ebooks/docx/writer/styles.py index 73575875e5..f2c854de20 100644 --- a/src/calibre/ebooks/docx/writer/styles.py +++ b/src/calibre/ebooks/docx/writer/styles.py @@ -593,6 +593,7 @@ class StylesManager(object): heading_style.outline_level = i snum = len(str(max(1, len(counts) - 1))) + heading_styles = [] for i, (style, count) in enumerate(counts.most_common()): if i == 0: self.normal_style = style @@ -602,6 +603,7 @@ class StylesManager(object): val = 'Para %0{}d'.format(snum) % i else: val = 'Heading %d' % (style.outline_level + 1) + heading_styles.append(style) style.id = style.name = val style.seq = i self.combined_styles = sorted(counts.iterkeys(), key=attrgetter('seq')) @@ -609,6 +611,17 @@ class StylesManager(object): self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, ( self.text_styles, self.combined_styles)))) + self.primary_heading_style = None + if heading_styles: + heading_styles.sort(key=attrgetter('outline_level')) + self.primary_heading_style = heading_styles[0] + else: + ms = 0 + for s in self.combined_styles: + if s.rs.font_size > ms: + self.primary_heading_style = s + ms = s.rs.font_size + def serialize(self, styles): lang = styles.xpath('descendant::*[local-name()="lang"]')[0] for k in tuple(lang.attrib): diff --git a/src/calibre/gui2/convert/docx_output.py b/src/calibre/gui2/convert/docx_output.py index 9329e9c40e..e57669f161 100644 --- a/src/calibre/gui2/convert/docx_output.py +++ b/src/calibre/gui2/convert/docx_output.py @@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form): def __init__(self, parent, get_option, get_help, db=None, book_id=None): Widget.__init__(self, parent, [ - 'docx_page_size', 'docx_custom_page_size', 'docx_no_cover', + 'docx_page_size', 'docx_custom_page_size', 'docx_no_cover', 'docx_no_toc', ]) for x in get_option('docx_page_size').option.choices: self.opt_docx_page_size.addItem(x) diff --git a/src/calibre/gui2/convert/docx_output.ui b/src/calibre/gui2/convert/docx_output.ui index 68da6ff387..1f2059efa1 100644 --- a/src/calibre/gui2/convert/docx_output.ui +++ b/src/calibre/gui2/convert/docx_output.ui @@ -47,13 +47,20 @@ - + Do not insert &cover as image at start of document + + + + Do not insert the &Table of Contents as a page a tthe start of the document + + +