diff --git a/src/calibre/ebooks/docx/footnotes.py b/src/calibre/ebooks/docx/footnotes.py new file mode 100644 index 0000000000..017ae160f4 --- /dev/null +++ b/src/calibre/ebooks/docx/footnotes.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + +from collections import OrderedDict + +from calibre.ebooks.docx.names import get, XPath, descendants + +class Note(object): + + def __init__(self, parent): + self.type = get(parent, 'w:type', 'normal') + self.parent = parent + + def __iter__(self): + for p in descendants(self.parent, 'w:p'): + yield p + +class Footnotes(object): + + def __init__(self): + self.footnotes = {} + self.endnotes = {} + self.counter = 0 + self.notes = OrderedDict() + + def __call__(self, footnotes, endnotes): + if footnotes is not None: + for footnote in XPath('./w:footnote[@w:id]')(footnotes): + fid = get(footnote, 'w:id') + if fid: + self.footnotes[fid] = Note(footnote) + + if endnotes is not None: + for endnote in XPath('./w:endnote[@w:id]')(endnotes): + fid = get(endnote, 'w:id') + if fid: + self.endnotes[fid] = Note(endnote) + + def get_ref(self, ref): + fid = get(ref, 'w:id') + notes = self.footnotes if ref.tag.endswith('}footnoteReference') else self.endnotes + note = notes.get(fid, None) + if note is not None and note.type == 'normal': + self.counter += 1 + anchor = 'note_%d' % self.counter + self.notes[anchor] = (type('')(self.counter), note) + return anchor, type('')(self.counter) + return None, None + + def __iter__(self): + for anchor, (counter, note) in self.notes.iteritems(): + yield anchor, counter, note + + @property + def has_notes(self): + return bool(self.notes) + diff --git a/src/calibre/ebooks/docx/names.py b/src/calibre/ebooks/docx/names.py index 4e9c34cfc7..0f4e6155b1 100644 --- a/src/calibre/ebooks/docx/names.py +++ b/src/calibre/ebooks/docx/names.py @@ -21,6 +21,8 @@ NUMBERING = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships FONTS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable' IMAGES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image' LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink' +FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes' +ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes' namespaces = { 'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main', diff --git a/src/calibre/ebooks/docx/styles.py b/src/calibre/ebooks/docx/styles.py index 2816156a67..c96f1260e0 100644 --- a/src/calibre/ebooks/docx/styles.py +++ b/src/calibre/ebooks/docx/styles.py @@ -384,6 +384,16 @@ class Styles(object): p { text-indent: 1.5em } ul, ol, p { margin: 0; padding: 0 } + + sup.noteref a { text-decoration: none } + + h1.notes-header { page-break-before: always } + + dl.notes dt { font-size: large } + + dl.notes dt a { text-decoration: none } + + dl.notes dd { page-break-after: always } ''') % (self.body_font_family, self.body_font_size) if ef: prefix = ef + '\n' + prefix diff --git a/src/calibre/ebooks/docx/to_html.py b/src/calibre/ebooks/docx/to_html.py index c654d6c6bf..698b5d01ea 100644 --- a/src/calibre/ebooks/docx/to_html.py +++ b/src/calibre/ebooks/docx/to_html.py @@ -11,16 +11,17 @@ from collections import OrderedDict, defaultdict from lxml import html from lxml.html.builder import ( - HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, HR) + HTML, HEAD, TITLE, BODY, LINK, META, P, SPAN, BR, DIV, SUP, A, DT, DL, DD, H1) from calibre.ebooks.docx.container import DOCX, fromstring from calibre.ebooks.docx.names import ( XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor, - descendants, ancestor) + descendants, ancestor, FOOTNOTES, ENDNOTES) from calibre.ebooks.docx.styles import Styles, inherit, PageProperties from calibre.ebooks.docx.numbering import Numbering from calibre.ebooks.docx.fonts import Fonts from calibre.ebooks.docx.images import Images +from calibre.ebooks.docx.footnotes import Footnotes from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1 class Text: @@ -34,9 +35,10 @@ class Text: class Convert(object): - def __init__(self, path_or_stream, dest_dir=None, log=None): + def __init__(self, path_or_stream, dest_dir=None, log=None, notes_text=None): self.docx = DOCX(path_or_stream, log=log) self.log = self.docx.log + self.notes_text = notes_text or _('Notes') self.dest_dir = dest_dir or os.getcwdu() self.mi = self.docx.metadata self.body = BODY() @@ -81,6 +83,20 @@ class Convert(object): p = self.convert_p(wp) self.body.append(p) + if self.footnotes.has_notes: + dl = DL() + dl.set('class', 'notes') + self.body.append(H1(self.notes_text)) + self.body[-1].set('class', 'notes-header') + self.body.append(dl) + for anchor, text, note in self.footnotes: + dl.append(DT('[', A('←' + text, href='#back_%s' % anchor, title=text), id=anchor)) + dl[-1][0].tail = ']' + dl.append(DD()) + for wp in note: + p = self.convert_p(wp) + dl[-1].append(p) + self.resolve_links(relationships_by_id) # TODO: tables child of (nested tables?) @@ -154,9 +170,25 @@ class Convert(object): nname = get_name(NUMBERING, 'numbering.xml') sname = get_name(STYLES, 'styles.xml') fname = get_name(FONTS, 'fontTable.xml') + foname = get_name(FOOTNOTES, 'footnotes.xml') + enname = get_name(ENDNOTES, 'endnotes.xml') numbering = self.numbering = Numbering() + footnotes = self.footnotes = Footnotes() fonts = self.fonts = Fonts() + foraw = enraw = None + if foname is not None: + try: + foraw = self.docx.read(foname) + except KeyError: + self.log.warn('Footnotes %s do not exist' % foname) + if enname is not None: + try: + enraw = self.docx.read(enname) + except KeyError: + self.log.warn('Endnotes %s do not exist' % enname) + footnotes(fromstring(foraw) if foraw else None, fromstring(enraw) if enraw else None) + if fname is not None: embed_relationships = self.docx.get_relationships(fname)[0] try: @@ -327,9 +359,13 @@ class Convert(object): for img in self.images.to_html(child, self.current_page, self.docx, self.dest_dir): text.add_elem(img) ans.append(text.elem) - elif is_tag(child, 'w:continuationSeparator'): - text.add_elem(HR()) - ans.append(text.elem) + elif is_tag(child, 'w:footnoteReference') or is_tag(child, 'w:endnoteReference'): + anchor, name = self.footnotes.get_ref(child) + if anchor and name: + l = SUP(A(name, href='#' + anchor, title=name), id='back_%s' % anchor) + l.set('class', 'noteref') + text.add_elem(l) + ans.append(text.elem) if text.buf: setattr(text.elem, text.attr, ''.join(text.buf))