mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Output: Generate a ToC field in the output document based on the metadata table of contents
This commit is contained in:
parent
f3cd8990b6
commit
8cd9774ad5
@ -32,6 +32,9 @@ class DOCXOutput(OutputFormatPlugin):
|
||||
help=_('Do not insert the book cover as an image at the start of the document.'
|
||||
' If you use this option, the book cover will be discarded.')),
|
||||
|
||||
OptionRecommendation(name='docx_no_toc', recommended_value=False,
|
||||
help=_('Do not insert the table of contents as a page at the start of the document.')),
|
||||
|
||||
OptionRecommendation(name='extract_to',
|
||||
help=_('Extract the contents of the generated %s file to the '
|
||||
'specified directory. The contents of the directory are first '
|
||||
@ -59,7 +62,7 @@ class DOCXOutput(OutputFormatPlugin):
|
||||
from calibre.ebooks.docx.writer.from_html import Convert
|
||||
docx = DOCX(opts, log)
|
||||
self.convert_metadata(oeb)
|
||||
Convert(oeb, docx, self.mi, not opts.docx_no_cover)()
|
||||
Convert(oeb, docx, self.mi, not opts.docx_no_cover, not opts.docx_no_toc)()
|
||||
docx.write(output_path, self.mi)
|
||||
if opts.extract_to:
|
||||
from calibre.ebooks.docx.dump import do_dump
|
||||
|
@ -63,7 +63,7 @@ class TextRun(object):
|
||||
self.link = None
|
||||
self.lang = lang
|
||||
self.parent_style = None
|
||||
self.makelement = namespace.makeelement
|
||||
self.makeelement = namespace.makeelement
|
||||
|
||||
def add_text(self, text, preserve_whitespace, bookmark=None, link=None):
|
||||
if not preserve_whitespace:
|
||||
@ -82,7 +82,7 @@ class TextRun(object):
|
||||
self.texts.append((drawing, None, bookmark))
|
||||
|
||||
def serialize(self, p, links_manager):
|
||||
makeelement = self.makelement
|
||||
makeelement = self.makeelement
|
||||
parent = p if self.link is None else links_manager.serialize_hyperlink(p, self.link)
|
||||
r = makeelement(parent, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr', append=False)
|
||||
@ -390,8 +390,8 @@ class Convert(object):
|
||||
a[href] { text-decoration: underline; color: blue }
|
||||
'''
|
||||
|
||||
def __init__(self, oeb, docx, mi, add_cover):
|
||||
self.oeb, self.docx, self.add_cover = oeb, docx, add_cover
|
||||
def __init__(self, oeb, docx, mi, add_cover, add_toc):
|
||||
self.oeb, self.docx, self.add_cover, self.add_toc = oeb, docx, add_cover, add_toc
|
||||
self.log, self.opts = docx.log, docx.opts
|
||||
self.mi = mi
|
||||
self.cover_img = None
|
||||
@ -411,6 +411,8 @@ class Convert(object):
|
||||
|
||||
for item in self.oeb.spine:
|
||||
self.process_item(item)
|
||||
if self.add_toc:
|
||||
self.links_manager.process_toc_links(self.oeb)
|
||||
|
||||
if self.add_cover and self.oeb.metadata.cover and unicode(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
|
||||
cover_id = unicode(self.oeb.metadata.cover[0])
|
||||
@ -557,6 +559,8 @@ class Convert(object):
|
||||
self.docx.document, self.docx.styles, body = create_skeleton(self.opts)
|
||||
self.blocks.serialize(body)
|
||||
body.append(body[0]) # Move <sectPr> to the end
|
||||
if self.links_manager.toc:
|
||||
self.links_manager.serialize_toc(body, self.styles_manager.primary_heading_style)
|
||||
if self.cover_img is not None:
|
||||
self.images_manager.write_cover_block(body, self.cover_img)
|
||||
self.styles_manager.serialize(self.docx.styles)
|
||||
|
@ -6,10 +6,11 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2015, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import posixpath
|
||||
import posixpath, re
|
||||
from uuid import uuid4
|
||||
from urlparse import urlparse
|
||||
|
||||
from calibre.utils.filenames import ascii_text
|
||||
|
||||
def start_text(tag, prefix_len=0, top_level=True):
|
||||
ans = tag.text or ''
|
||||
@ -23,17 +24,53 @@ def start_text(tag, prefix_len=0, top_level=True):
|
||||
ans = ans[:limit] + '...'
|
||||
return ans
|
||||
|
||||
class TOCItem(object):
|
||||
|
||||
def __init__(self, title, bmark, level):
|
||||
self.title, self.bmark, self.level = title, bmark, level
|
||||
self.is_first = self.is_last = False
|
||||
|
||||
def serialize(self, body, makeelement):
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
makeelement(ppr, 'w:pStyle', w_val="Normal")
|
||||
makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level))
|
||||
if self.is_first:
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='begin')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:instrText').text = ' TOC \h '
|
||||
r[0].set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='separate')
|
||||
hl = makeelement(p, 'w:hyperlink', w_anchor=self.bmark)
|
||||
r = makeelement(hl, 'w:r')
|
||||
rpr = makeelement(r, 'w:rPr')
|
||||
makeelement(rpr, 'w:color', w_val='0000FF', w_themeColor='hyperlink')
|
||||
makeelement(rpr, 'w:u', w_val='single')
|
||||
makeelement(r, 'w:t').text = self.title
|
||||
if self.is_last:
|
||||
r = makeelement(p, 'w:r')
|
||||
makeelement(r, 'w:fldChar', w_fldCharType='end')
|
||||
body.insert(0, p)
|
||||
|
||||
def sanitize_bookmark_name(base):
|
||||
return re.sub(r'[^0-9a-zA-Z]', '_', ascii_text(base))
|
||||
|
||||
|
||||
class LinksManager(object):
|
||||
|
||||
def __init__(self, namespace, document_relationships):
|
||||
self.namespace = namespace
|
||||
self.docment_relationships = document_relationships
|
||||
self.document_relationships = document_relationships
|
||||
self.top_anchor = type('')(uuid4().hex)
|
||||
self.anchor_map = {}
|
||||
self.used_bookmark_names = set()
|
||||
self.bmark_id = 0
|
||||
self.document_hrefs = set()
|
||||
self.external_links = {}
|
||||
self.toc = []
|
||||
|
||||
def bookmark_for_anchor(self, anchor, current_item, html_tag):
|
||||
key = (current_item.href, anchor)
|
||||
@ -44,10 +81,11 @@ class LinksManager(object):
|
||||
self.document_hrefs.add(current_item.href)
|
||||
else:
|
||||
name = start_text(html_tag).strip() or anchor
|
||||
name = sanitize_bookmark_name(name)
|
||||
i, bname = 0, name
|
||||
while name in self.used_bookmark_names:
|
||||
i += 1
|
||||
name = bname + (' %d' % i)
|
||||
name = bname + ('_%d' % i)
|
||||
self.anchor_map[key] = name
|
||||
return name
|
||||
|
||||
@ -71,6 +109,47 @@ class LinksManager(object):
|
||||
return self.namespace.makeelement(parent, 'w:hyperlink', w_anchor=bmark, w_tooltip=tooltip or '')
|
||||
if purl.scheme in {'http', 'https', 'ftp'}:
|
||||
if url not in self.external_links:
|
||||
self.external_links[url] = self.docment_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
|
||||
self.external_links[url] = self.document_relationships.add_relationship(url, self.namespace.names['LINKS'], target_mode='External')
|
||||
return self.namespace.makeelement(parent, 'w:hyperlink', r_id=self.external_links[url], w_tooltip=tooltip or '')
|
||||
return parent
|
||||
|
||||
def process_toc_node(self, toc, level=0):
|
||||
href = toc.href
|
||||
if href:
|
||||
purl = urlparse(href)
|
||||
href = purl.path
|
||||
if href in self.document_hrefs:
|
||||
key = (href, purl.fragment or self.top_anchor)
|
||||
if key in self.anchor_map:
|
||||
bmark = self.anchor_map[key]
|
||||
else:
|
||||
bmark = self.anchor_map[(href, self.top_anchor)]
|
||||
self.toc.append(TOCItem(toc.title, bmark, level))
|
||||
for child in toc:
|
||||
self.process_toc_node(child, level+1)
|
||||
|
||||
def process_toc_links(self, oeb):
|
||||
self.toc = []
|
||||
has_toc = oeb.toc and oeb.toc.count() > 1
|
||||
if not has_toc:
|
||||
return
|
||||
for child in oeb.toc:
|
||||
self.process_toc_node(child)
|
||||
if self.toc:
|
||||
self.toc[0].is_first = True
|
||||
self.toc[-1].is_last = True
|
||||
|
||||
def serialize_toc(self, body, primary_heading_style):
|
||||
pbb = body[0].xpath('//*[local-name()="pageBreakBefore"]')[0]
|
||||
pbb.set('{%s}val' % self.namespace.namespaces['w'], 'on')
|
||||
for block in reversed(self.toc):
|
||||
block.serialize(body, self.namespace.makeelement)
|
||||
title = __('Table of Contents')
|
||||
makeelement = self.namespace.makeelement
|
||||
p = makeelement(body, 'w:p', append=False)
|
||||
ppr = makeelement(p, 'w:pPr')
|
||||
if primary_heading_style is not None:
|
||||
makeelement(ppr, 'w:pStyle', w_val=primary_heading_style.id)
|
||||
makeelement(ppr, 'w:pageBreakBefore', w_val='off')
|
||||
makeelement(makeelement(p, 'w:r'), 'w:t').text = title
|
||||
body.insert(0, p)
|
||||
|
@ -593,6 +593,7 @@ class StylesManager(object):
|
||||
heading_style.outline_level = i
|
||||
|
||||
snum = len(str(max(1, len(counts) - 1)))
|
||||
heading_styles = []
|
||||
for i, (style, count) in enumerate(counts.most_common()):
|
||||
if i == 0:
|
||||
self.normal_style = style
|
||||
@ -602,6 +603,7 @@ class StylesManager(object):
|
||||
val = 'Para %0{}d'.format(snum) % i
|
||||
else:
|
||||
val = 'Heading %d' % (style.outline_level + 1)
|
||||
heading_styles.append(style)
|
||||
style.id = style.name = val
|
||||
style.seq = i
|
||||
self.combined_styles = sorted(counts.iterkeys(), key=attrgetter('seq'))
|
||||
@ -609,6 +611,17 @@ class StylesManager(object):
|
||||
self.log.debug('%d Text Styles %d Combined styles' % tuple(map(len, (
|
||||
self.text_styles, self.combined_styles))))
|
||||
|
||||
self.primary_heading_style = None
|
||||
if heading_styles:
|
||||
heading_styles.sort(key=attrgetter('outline_level'))
|
||||
self.primary_heading_style = heading_styles[0]
|
||||
else:
|
||||
ms = 0
|
||||
for s in self.combined_styles:
|
||||
if s.rs.font_size > ms:
|
||||
self.primary_heading_style = s
|
||||
ms = s.rs.font_size
|
||||
|
||||
def serialize(self, styles):
|
||||
lang = styles.xpath('descendant::*[local-name()="lang"]')[0]
|
||||
for k in tuple(lang.attrib):
|
||||
|
@ -19,7 +19,7 @@ class PluginWidget(Widget, Ui_Form):
|
||||
|
||||
def __init__(self, parent, get_option, get_help, db=None, book_id=None):
|
||||
Widget.__init__(self, parent, [
|
||||
'docx_page_size', 'docx_custom_page_size', 'docx_no_cover',
|
||||
'docx_page_size', 'docx_custom_page_size', 'docx_no_cover', 'docx_no_toc',
|
||||
])
|
||||
for x in get_option('docx_page_size').option.choices:
|
||||
self.opt_docx_page_size.addItem(x)
|
||||
|
@ -47,13 +47,20 @@
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<item row="3" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_docx_no_cover">
|
||||
<property name="text">
|
||||
<string>Do not insert &cover as image at start of document</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="0" colspan="2">
|
||||
<widget class="QCheckBox" name="opt_docx_no_toc">
|
||||
<property name="text">
|
||||
<string>Do not insert the &Table of Contents as a page a tthe start of the document</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<resources/>
|
||||
|
Loading…
x
Reference in New Issue
Block a user