mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Convert tabs to non-breaking spaces
DOCX Input: Convert tabs in the docx file into non-breaking spaces in the output document. Note that custom tab stops are not supported. Fixes #1228893 [Converting .docx file fails to preserve para first line indent](https://bugs.launchpad.net/calibre/+bug/1228893)
This commit is contained in:
parent
2c46d54c47
commit
a4f6d6d19e
@ -23,6 +23,7 @@ LINKS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships
|
|||||||
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
|
FOOTNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes'
|
||||||
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
ENDNOTES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes'
|
||||||
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
THEMES = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme'
|
||||||
|
SETTINGS = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings'
|
||||||
|
|
||||||
namespaces = {
|
namespaces = {
|
||||||
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
|
||||||
|
20
src/calibre/ebooks/docx/settings.py
Normal file
20
src/calibre/ebooks/docx/settings.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
from calibre.ebooks.docx.names import XPath, get
|
||||||
|
|
||||||
|
class Settings(object):
|
||||||
|
|
||||||
|
def __init__(self, root):
|
||||||
|
self.default_tab_stop = 720 / 20
|
||||||
|
for dts in XPath('//w:defaultTabStop[@w:val]')(root):
|
||||||
|
try:
|
||||||
|
self.default_tab_stop = int(get(dts, 'w:val')) / 20
|
||||||
|
except (ValueError, TypeError, AttributeError):
|
||||||
|
pass
|
||||||
|
|
@ -458,6 +458,8 @@ class Styles(object):
|
|||||||
|
|
||||||
dl.notes dd:last-of-type { page-break-after: avoid }
|
dl.notes dd:last-of-type { page-break-after: avoid }
|
||||||
|
|
||||||
|
span.tab { white-space: pre }
|
||||||
|
|
||||||
''') % (self.body_font_family, self.body_font_size, self.body_color)
|
''') % (self.body_font_family, self.body_font_size, self.body_color)
|
||||||
if ef:
|
if ef:
|
||||||
prefix = ef + '\n' + prefix
|
prefix = ef + '\n' + prefix
|
||||||
|
@ -6,7 +6,7 @@ from __future__ import (unicode_literals, division, absolute_import,
|
|||||||
__license__ = 'GPL v3'
|
__license__ = 'GPL v3'
|
||||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
import sys, os, re
|
import sys, os, re, math
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
@ -16,7 +16,7 @@ from lxml.html.builder import (
|
|||||||
from calibre.ebooks.docx.container import DOCX, fromstring
|
from calibre.ebooks.docx.container import DOCX, fromstring
|
||||||
from calibre.ebooks.docx.names import (
|
from calibre.ebooks.docx.names import (
|
||||||
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
|
XPath, is_tag, XML, STYLES, NUMBERING, FONTS, get, generate_anchor,
|
||||||
descendants, FOOTNOTES, ENDNOTES, children, THEMES)
|
descendants, FOOTNOTES, ENDNOTES, children, THEMES, SETTINGS)
|
||||||
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
from calibre.ebooks.docx.styles import Styles, inherit, PageProperties
|
||||||
from calibre.ebooks.docx.numbering import Numbering
|
from calibre.ebooks.docx.numbering import Numbering
|
||||||
from calibre.ebooks.docx.fonts import Fonts
|
from calibre.ebooks.docx.fonts import Fonts
|
||||||
@ -27,6 +27,7 @@ from calibre.ebooks.docx.cleanup import cleanup_markup
|
|||||||
from calibre.ebooks.docx.theme import Theme
|
from calibre.ebooks.docx.theme import Theme
|
||||||
from calibre.ebooks.docx.toc import create_toc
|
from calibre.ebooks.docx.toc import create_toc
|
||||||
from calibre.ebooks.docx.fields import Fields
|
from calibre.ebooks.docx.fields import Fields
|
||||||
|
from calibre.ebooks.docx.settings import Settings
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
from calibre.utils.localization import canonicalize_lang, lang_as_iso639_1
|
||||||
|
|
||||||
@ -227,6 +228,7 @@ class Convert(object):
|
|||||||
|
|
||||||
nname = get_name(NUMBERING, 'numbering.xml')
|
nname = get_name(NUMBERING, 'numbering.xml')
|
||||||
sname = get_name(STYLES, 'styles.xml')
|
sname = get_name(STYLES, 'styles.xml')
|
||||||
|
sename = get_name(SETTINGS, 'settings.xml')
|
||||||
fname = get_name(FONTS, 'fontTable.xml')
|
fname = get_name(FONTS, 'fontTable.xml')
|
||||||
tname = get_name(THEMES, 'theme1.xml')
|
tname = get_name(THEMES, 'theme1.xml')
|
||||||
foname = get_name(FOOTNOTES, 'footnotes.xml')
|
foname = get_name(FOOTNOTES, 'footnotes.xml')
|
||||||
@ -237,6 +239,14 @@ class Convert(object):
|
|||||||
|
|
||||||
foraw = enraw = None
|
foraw = enraw = None
|
||||||
forel, enrel = ({}, {}), ({}, {})
|
forel, enrel = ({}, {}), ({}, {})
|
||||||
|
if sename is not None:
|
||||||
|
try:
|
||||||
|
seraw = self.docx.read(sename)
|
||||||
|
except KeyError:
|
||||||
|
self.log.warn('Settings %s do not exist' % sename)
|
||||||
|
else:
|
||||||
|
self.settings = Settings(fromstring(seraw))
|
||||||
|
|
||||||
if foname is not None:
|
if foname is not None:
|
||||||
try:
|
try:
|
||||||
foraw = self.docx.read(foname)
|
foraw = self.docx.read(foname)
|
||||||
@ -538,6 +548,11 @@ class Convert(object):
|
|||||||
l.set('class', 'noteref')
|
l.set('class', 'noteref')
|
||||||
text.add_elem(l)
|
text.add_elem(l)
|
||||||
ans.append(text.elem)
|
ans.append(text.elem)
|
||||||
|
elif is_tag(child, 'w:tab'):
|
||||||
|
spaces = int(math.ceil((self.settings.default_tab_stop / 36) * 6))
|
||||||
|
text.add_elem(SPAN(NBSP * spaces))
|
||||||
|
ans.append(text.elem)
|
||||||
|
ans[-1].set('class', 'tab')
|
||||||
if text.buf:
|
if text.buf:
|
||||||
setattr(text.elem, text.attr, ''.join(text.buf))
|
setattr(text.elem, text.attr, ''.join(text.buf))
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user