mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 18:54:09 -04:00
DOCX: Support for footnotes and endnotes
This commit is contained in:
parent
997dcae358
commit
3566c2e5cb
62
src/calibre/ebooks/docx/footnotes.py
Normal file
62
src/calibre/ebooks/docx/footnotes.py
Normal file
@ -0,0 +1,62 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
from __future__ import (unicode_literals, division, absolute_import,
|
||||
print_function)
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from calibre.ebooks.docx.names import get, XPath, descendants
|
||||
|
||||
class Note(object):
|
||||
|
||||
def __init__(self, parent):
|
||||
self.type = get(parent, 'w:type', 'normal')
|
||||
self.parent = parent
|
||||
|
||||
def __iter__(self):
|
||||
for p in descendants(self.parent, 'w:p'):
|
||||
yield p
|
||||
|
||||
class Footnotes(object):
|
||||
|
||||
def __init__(self):
|
||||
self.footnotes = {}
|
||||
self.endnotes = {}
|
||||
self.counter = 0
|
||||
self.notes = OrderedDict()
|
||||
|
||||
def __call__(self, footnotes, endnotes):
|
||||
if footnotes is not None:
|
||||
for footnote in XPath('./w:footnote[@w:id]')(footnotes):
|
||||
fid = get(footnote, 'w:id')
|
||||
if fid:
|
||||
self.footnotes[fid] = Note(footnote)
|
||||
|
||||
if endnotes is not None:
|
||||
for endnote in XPath('./w:endnote[@w:id]')(endnotes):
|
||||
fid = get(endnote, 'w:id')
|
||||
if fid:
|
||||
self.endnotes[fid] = Note(endnote)
|
||||
|
||||
def get_ref(self, ref):
|
||||
fid = get(ref, 'w:id')
|
||||
notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes
|
||||
note = notes.get(fid, None)
|
||||
if note is not None and note.type == 'normal':
|
||||
self.counter += 1
|
||||
anchor = 'note_%d' % self.counter
|
||||
self.notes[anchor] = (type('')(self.counter), note)
|
||||
return anchor, type('')(self.counter)
|
||||
return None, None
|
||||
|
||||
def __iter__(self):
|
||||
for anchor, (counter, note) in self.notes.iteritems():
|
||||
yield anchor, counter, note
|
||||
|
||||
@property
|
||||
def has_notes(self):
|
||||
return bool(self.notes)
|
||||
|
@ -21,6 +21,8 @@ NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
|
||||
FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable'
|
||||
IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image'
|
||||
LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink'
|
||||
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
|
||||
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
||||
|
||||
namespaces = {
|
||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||
|
@ -384,6 +384,16 @@ class Styles(object):
|
||||
p { text-indent: 1.5em }
|
||||
|
||||
ul, ol, p { margin: 0; padding: 0 }
|
||||
|
||||
sup.noteref a { text-decoration: none }
|
||||
|
||||
h1.notes-header { page-break-before: always }
|
||||
|
||||
dl.notes dt { font-size: large }
|
||||
|
||||
dl.notes dt a { text-decoration: none }
|
||||
|
||||
dl.notes dd { page-break-after: always }
|
||||
''') % (self.body_font_family, self.body_font_size)
|
||||
if ef:
|
||||
prefix = ef + '\n' + prefix
|
||||
|
@ -11,16 +11,17 @@ from collections import OrderedDict, defaultdict
|
||||
|
||||
from lxml import html
|
||||
from lxml.html.builder import (
|
||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, HR)
|
||||
HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1)
|
||||
|
||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||
from calibre.ebooks.docx.names import (
|
||||
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
|
||||
descendants, ancestor)
|
||||
descendants, ancestor, FOOTNOTES, ENDNOTES)
|
||||
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||
from calibre.ebooks.docx.numbering import Numbering
|
||||
from calibre.ebooks.docx.fonts import Fonts
|
||||
from calibre.ebooks.docx.images import Images
|
||||
from calibre.ebooks.docx.footnotes import Footnotes
|
||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||
|
||||
class Text:
|
||||
@ -34,9 +35,10 @@ class Text:
|
||||
|
||||
class Convert(object):
|
||||
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None):
|
||||
def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None):
|
||||
self.docx = DOCX(path_or_stream, log=log)
|
||||
self.log = self.docx.log
|
||||
self.notes_text = notes_text or _('Notes')
|
||||
self.dest_dir = dest_dir or os.getcwdu()
|
||||
self.mi = self.docx.metadata
|
||||
self.body = BODY()
|
||||
@ -81,6 +83,20 @@ class Convert(object):
|
||||
p = self.convert_p(wp)
|
||||
self.body.append(p)
|
||||
|
||||
if self.footnotes.has_notes:
|
||||
dl = DL()
|
||||
dl.set('class', 'notes')
|
||||
self.body.append(H1(self.notes_text))
|
||||
self.body[-1].set('class', 'notes-header')
|
||||
self.body.append(dl)
|
||||
for anchor, text, note in self.footnotes:
|
||||
dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor))
|
||||
dl[-1][0].tail = ']'
|
||||
dl.append(DD())
|
||||
for wp in note:
|
||||
p = self.convert_p(wp)
|
||||
dl[-1].append(p)
|
||||
|
||||
self.resolve_links(relationships_by_id)
|
||||
# TODO: tables <w:tbl> child of <w:body> (nested tables?)
|
||||
|
||||
@ -154,9 +170,25 @@ class Convert(object):
|
||||
nname = get_name(NUMBERING, 'numbering.xml')
|
||||
sname = get_name(STYLES, 'styles.xml')
|
||||
fname = get_name(FONTS, 'fontTable.xml')
|
||||
foname = get_name(FOOTNOTES, 'footnotes.xml')
|
||||
enname = get_name(ENDNOTES, 'endnotes.xml')
|
||||
numbering = self.numbering = Numbering()
|
||||
footnotes = self.footnotes = Footnotes()
|
||||
fonts = self.fonts = Fonts()
|
||||
|
||||
foraw = enraw = None
|
||||
if foname is not None:
|
||||
try:
|
||||
foraw = self.docx.read(foname)
|
||||
except KeyError:
|
||||
self.log.warn('Footnotes %s do not exist' % foname)
|
||||
if enname is not None:
|
||||
try:
|
||||
enraw = self.docx.read(enname)
|
||||
except KeyError:
|
||||
self.log.warn('Endnotes %s do not exist' % enname)
|
||||
footnotes(fromstring(foraw) if foraw else None, fromstring(enraw) if enraw else None)
|
||||
|
||||
if fname is not None:
|
||||
embed_relationships = self.docx.get_relationships(fname)[0]
|
||||
try:
|
||||
@ -327,9 +359,13 @@ class Convert(object):
|
||||
for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir):
|
||||
text.add_elem(img)
|
||||
ans.append(text.elem)
|
||||
elif is_tag(child, 'w:continuationSeparator'):
|
||||
text.add_elem(HR())
|
||||
ans.append(text.elem)
|
||||
elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'):
|
||||
anchor, name = self.footnotes.get_ref(child)
|
||||
if anchor and name:
|
||||
l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor)
|
||||
l.set('class', 'noteref')
|
||||
text.add_elem(l)
|
||||
ans.append(text.elem)
|
||||
if text.buf:
|
||||
setattr(text.elem, text.attr, ''.join(text.buf))
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user