mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX: Merge runs with the same styling to reduce amount of generated
markup
This commit is contained in:
parent
0cc7a43ded
commit
83b0ef58fe
59
src/calibre/ebooks/docx/cleanup.py
Normal file
59
src/calibre/ebooks/docx/cleanup.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=utf-8
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||||
|
|
||||||
|
|
||||||
|
def mergeable(previous, current):
|
||||||
|
if previous.tail or current.tail:
|
||||||
|
return False
|
||||||
|
if previous.get('class', None) != current.get('class', None):
|
||||||
|
return False
|
||||||
|
if current.get('id', False):
|
||||||
|
return False
|
||||||
|
try:
|
||||||
|
return next(previous.itersiblings()) is current
|
||||||
|
except StopIteration:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def append_text(parent, text):
|
||||||
|
if len(parent) > 0:
|
||||||
|
parent[-1].tail = (parent[-1].tail or '') + text
|
||||||
|
else:
|
||||||
|
parent.text = (parent.text or '') + text
|
||||||
|
|
||||||
|
|
||||||
|
def merge(parent, span):
|
||||||
|
if span.text:
|
||||||
|
append_text(parent, span.text)
|
||||||
|
for child in span:
|
||||||
|
parent.append(child)
|
||||||
|
if span.tail:
|
||||||
|
append_text(parent, span.tail)
|
||||||
|
span.getparent().remove(span)
|
||||||
|
|
||||||
|
|
||||||
|
def merge_run(run):
|
||||||
|
parent = run[0]
|
||||||
|
for span in run[1:]:
|
||||||
|
merge(parent, span)
|
||||||
|
|
||||||
|
|
||||||
|
def cleanup_markup(root, styles):
|
||||||
|
# Merge consecutive spans that have the same styling
|
||||||
|
current_run = []
|
||||||
|
for span in root.xpath('//span'):
|
||||||
|
if not current_run:
|
||||||
|
current_run.append(span)
|
||||||
|
else:
|
||||||
|
last = current_run[-1]
|
||||||
|
if mergeable(last, span):
|
||||||
|
current_run.append(span)
|
||||||
|
else:
|
||||||
|
if len(current_run) > 1:
|
||||||
|
merge_run(current_run)
|
||||||
|
current_run = [span]
|
@ -23,6 +23,7 @@ from calibre.ebooks.docx.fonts import Fonts
|
|||||||
from calibre.ebooks.docx.images import Images
|
from calibre.ebooks.docx.images import Images
|
||||||
from calibre.ebooks.docx.tables import Tables
|
from calibre.ebooks.docx.tables import Tables
|
||||||
from calibre.ebooks.docx.footnotes import Footnotes
|
from calibre.ebooks.docx.footnotes import Footnotes
|
||||||
|
from calibre.ebooks.docx.cleanup import cleanup_markup
|
||||||
from calibre.ebooks.metadata.opf2 import OPFCreator
|
from calibre.ebooks.metadata.opf2 import OPFCreator
|
||||||
from calibre.ebooks.metadata.toc import TOC
|
from calibre.ebooks.metadata.toc import TOC
|
||||||
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
from calibre.ebooks.oeb.polish.toc import elem_to_toc_text
|
||||||
@ -157,6 +158,8 @@ class Convert(object):
|
|||||||
notes_header.set('class', '%s notes-header' % cls)
|
notes_header.set('class', '%s notes-header' % cls)
|
||||||
break
|
break
|
||||||
|
|
||||||
|
cleanup_markup(self.html, self.styles)
|
||||||
|
|
||||||
return self.write()
|
return self.write()
|
||||||
|
|
||||||
def read_page_properties(self, doc):
|
def read_page_properties(self, doc):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user