mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Initial implementation of pretty printing
This commit is contained in:
parent
27c041ee62
commit
10108a8772
@ -393,7 +393,7 @@ class Container(object): # {{{
|
||||
data, strip_encoding_pats=True, assume_utf8=True, resolve_entities=True)
|
||||
return etree.fromstring(data, parser=RECOVER_PARSER)
|
||||
|
||||
def parse_xhtml(self, data, fname):
|
||||
def parse_xhtml(self, data, fname='<string>'):
|
||||
if self.tweak_mode:
|
||||
return parse_html_tweak(data, log=self.log, decoder=self.decode)
|
||||
else:
|
||||
@ -423,7 +423,7 @@ class Container(object): # {{{
|
||||
ans = self.decode(ans)
|
||||
return ans
|
||||
|
||||
def parse_css(self, data, fname):
|
||||
def parse_css(self, data, fname='<string>'):
|
||||
from cssutils import CSSParser, log
|
||||
log.setLevel(logging.WARN)
|
||||
log.raiseExceptions = False
|
||||
|
@ -6,13 +6,188 @@ from __future__ import (unicode_literals, division, absolute_import,
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
import textwrap
|
||||
from future_builtins import map
|
||||
|
||||
from calibre.ebooks.oeb.polish.parsing import parse
|
||||
from calibre.ebooks.oeb.base import serialize, OEB_DOCS
|
||||
# from lxml.etree import Element
|
||||
|
||||
def fix_html(raw):
|
||||
root = parse(raw)
|
||||
return serialize(root, 'text/html').decode('utf-8')
|
||||
from calibre import force_unicode
|
||||
from calibre.ebooks.oeb.base import (
|
||||
serialize, OEB_DOCS, barename, OEB_STYLES, XPNSMAP, XHTML, SVG)
|
||||
from calibre.ebooks.oeb.polish.container import OPF_NAMESPACES, guess_type
|
||||
from calibre.utils.icu import sort_key
|
||||
|
||||
def isspace(x):
|
||||
return not x.strip('\u0009\u000a\u000c\u000d\u0020')
|
||||
|
||||
def pretty_xml_tree(elem, level=0, indent=' '):
|
||||
''' XML beautifier, assumes that elements that have children do not have
|
||||
textual content. Also assumes that there is no text immediately after
|
||||
closing tags. These are true for opf/ncx and container.xml files. If either
|
||||
of the assumptions are violated, there should be no data loss, but pretty
|
||||
printing wont produce optimal results.'''
|
||||
if (not elem.text and len(elem) > 0) or (elem.text and isspace(elem.text)):
|
||||
elem.text = '\n' + (indent * (level+1))
|
||||
for i, child in enumerate(elem):
|
||||
pretty_xml_tree(child, level=level+1, indent=indent)
|
||||
if not child.tail or isspace(child.tail):
|
||||
l = level + 1
|
||||
if i == len(elem) - 1:
|
||||
l -= 1
|
||||
child.tail = '\n' + (indent * l)
|
||||
|
||||
def pretty_opf(root):
|
||||
# Put all dc: tags first starting with title and author. Preserve order for
|
||||
# the rest.
|
||||
def dckey(x):
|
||||
return {'title':0, 'creator':1}.get(barename(x.tag), 2)
|
||||
for metadata in root.xpath('//opf:metadata', namespaces=OPF_NAMESPACES):
|
||||
dc_tags = metadata.xpath('./*[namespace-uri()="%s"]' % OPF_NAMESPACES['dc'])
|
||||
dc_tags.sort(key=dckey)
|
||||
for x in reversed(dc_tags):
|
||||
metadata.insert(0, x)
|
||||
|
||||
# Group items in the manifest
|
||||
spine_ids = root.xpath('//opf:spine/opf:itemref/@idref', namespaces=OPF_NAMESPACES)
|
||||
spine_ids = {x:i for i, x in enumerate(spine_ids)}
|
||||
|
||||
def manifest_key(x):
|
||||
mt = x.get('media-type', '')
|
||||
href = x.get('href', '')
|
||||
ext = href.rpartition('.')[-1].lower()
|
||||
cat = 1000
|
||||
if mt in OEB_DOCS:
|
||||
cat = 0
|
||||
elif mt == guess_type('a.ncx'):
|
||||
cat = 1
|
||||
elif mt in OEB_STYLES:
|
||||
cat = 2
|
||||
elif mt.startswith('image/'):
|
||||
cat = 3
|
||||
elif ext in {'otf', 'ttf', 'woff'}:
|
||||
cat = 4
|
||||
elif mt.startswith('audio/'):
|
||||
cat = 5
|
||||
elif mt.startswith('video/'):
|
||||
cat = 6
|
||||
|
||||
if cat == 0:
|
||||
i = spine_ids.get(x.get('id', None), 1000000000)
|
||||
else:
|
||||
i = sort_key(href)
|
||||
return (cat, i)
|
||||
|
||||
for manifest in root.xpath('//opf:manifest', namespaces=OPF_NAMESPACES):
|
||||
children = sorted(manifest, key=manifest_key)
|
||||
for x in reversed(children):
|
||||
manifest.insert(0, x)
|
||||
|
||||
SVG_TAG = SVG('svg')
|
||||
|
||||
BLOCK_TAGS = frozenset(map(XHTML, (
|
||||
'address', 'article', 'aside', 'audio', 'blockquote', 'body', 'canvas', 'dd',
|
||||
'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer', 'form',
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li',
|
||||
'noscript', 'ol', 'output', 'p', 'pre', 'script', 'section', 'style', 'table', 'tbody', 'td',
|
||||
'tfoot', 'thead', 'tr', 'ul', 'video'))) | {SVG_TAG}
|
||||
|
||||
|
||||
def isblock(x):
|
||||
if callable(x.tag) or not x.tag:
|
||||
return True
|
||||
if x.tag in BLOCK_TAGS:
|
||||
return True
|
||||
return False
|
||||
|
||||
def has_only_blocks(x):
|
||||
if hasattr(x.tag, 'split') and len(x) == 0:
|
||||
# Tag with no children,
|
||||
return False
|
||||
if x.text and not isspace(x.text):
|
||||
return False
|
||||
for child in x:
|
||||
if not isblock(child) or (child.tail and isspace(child.tail)):
|
||||
return False
|
||||
return True
|
||||
|
||||
def indent_for_tag(x):
|
||||
prev = x.getprevious()
|
||||
x = x.getparent().text if prev is None else prev.tail
|
||||
if not x:
|
||||
return None
|
||||
s = x.rpartition('\n')[-1]
|
||||
return s if isspace(s) else None
|
||||
|
||||
def set_indent(elem, attr, indent):
|
||||
x = getattr(elem, attr)
|
||||
if not x:
|
||||
x = indent
|
||||
else:
|
||||
lines = x.splitlines()
|
||||
if isspace(lines[-1]):
|
||||
lines[-1] = indent
|
||||
else:
|
||||
lines.append(indent)
|
||||
x = '\n'.join(lines)
|
||||
setattr(elem, attr, x)
|
||||
|
||||
def pretty_block(parent, level=1, indent=' '):
|
||||
''' Surround block tags with blank lines and recurse into child block tags
|
||||
that contain only other block tags '''
|
||||
if not parent.text or isspace(parent.text):
|
||||
parent.text = ''
|
||||
nn = '\n' if hasattr(parent.tag, 'strip') and barename(parent.tag) in {'tr', 'td', 'th'} else '\n\n'
|
||||
parent.text = parent.text + nn + (indent * level)
|
||||
for i, child in enumerate(parent):
|
||||
if isblock(child) and has_only_blocks(child):
|
||||
pretty_block(child, level=level+1, indent=indent)
|
||||
elif child.tag == SVG_TAG:
|
||||
pretty_xml_tree(child, level=level, indent=indent)
|
||||
l = level
|
||||
if i == len(parent) - 1:
|
||||
l -= 1
|
||||
if not child.tail or isspace(child.tail):
|
||||
child.tail = ''
|
||||
child.tail = child.tail + nn + (indent * l)
|
||||
|
||||
def pretty_html_tree(container, root):
|
||||
root.text = '\n\n'
|
||||
for child in root:
|
||||
child.tail = '\n\n'
|
||||
if hasattr(child.tag, 'endswith') and child.tag.endswith('}head'):
|
||||
pretty_xml_tree(child)
|
||||
for body in root.findall('h:body', namespaces=XPNSMAP):
|
||||
pretty_block(body)
|
||||
|
||||
# Handle <script> and <style> tags
|
||||
for child in root.xpath('//*[local-name()="script" or local-name()="style"]'):
|
||||
if child.text:
|
||||
indent = indent_for_tag(child)
|
||||
if child.tag.endswith('style'):
|
||||
child.text = force_unicode(pretty_css(container, '', child.text), 'utf-8')
|
||||
child.text = textwrap.dedent(child.text)
|
||||
child.text = '\n' + '\n'.join([(indent + x) if x else '' for x in child.text.splitlines()])
|
||||
set_indent(child, 'text', indent)
|
||||
|
||||
def fix_html(container, raw):
|
||||
root = container.parse_xhtml(raw)
|
||||
return serialize(root, 'text/html')
|
||||
|
||||
def pretty_html(container, name, raw):
|
||||
root = container.parse_xhtml(raw)
|
||||
pretty_html_tree(container, root)
|
||||
return serialize(root, 'text/html')
|
||||
|
||||
def pretty_css(container, name, raw):
|
||||
sheet = container.parse_css(raw)
|
||||
return serialize(sheet, 'text/css')
|
||||
|
||||
def pretty_xml(container, name, raw):
|
||||
root = container.parse_xml(raw)
|
||||
if name == container.opf_name:
|
||||
pretty_opf(root)
|
||||
pretty_xml_tree(root)
|
||||
return serialize(root, 'text/xml')
|
||||
|
||||
def fix_all_html(container):
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
@ -20,3 +195,24 @@ def fix_all_html(container):
|
||||
container.parsed(name)
|
||||
container.dirty(name)
|
||||
|
||||
def pretty_all(container):
|
||||
for name, mt in container.mime_map.iteritems():
|
||||
prettied = False
|
||||
if mt in OEB_DOCS:
|
||||
pretty_html_tree(container, container.parsed(name))
|
||||
prettied = True
|
||||
elif mt in OEB_STYLES:
|
||||
container.parsed(name)
|
||||
prettied = True
|
||||
elif name == container.opf_name:
|
||||
root = container.parsed(name)
|
||||
pretty_opf(root)
|
||||
pretty_xml_tree(root)
|
||||
prettied = True
|
||||
elif mt in {guess_type('a.ncx'), guess_type('a.xml')}:
|
||||
pretty_xml_tree(container.parsed(name))
|
||||
prettied = True
|
||||
if prettied:
|
||||
container.dirty(name)
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
|
||||
from calibre.ebooks.oeb.base import urlnormalize
|
||||
from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish
|
||||
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
|
||||
from calibre.ebooks.oeb.polish.pretty import fix_all_html
|
||||
from calibre.ebooks.oeb.polish.pretty import fix_all_html, pretty_all
|
||||
from calibre.ebooks.oeb.polish.replace import rename_files
|
||||
from calibre.ebooks.oeb.polish.split import split, merge, AbortError
|
||||
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
|
||||
@ -133,7 +133,7 @@ class Boss(QObject):
|
||||
container = current_container()
|
||||
self.gui.file_list.build(container)
|
||||
self.update_global_history_actions()
|
||||
self.gui.action_save.setEnabled(True)
|
||||
self.set_modified()
|
||||
self.update_editors_from_container()
|
||||
|
||||
def delete_requested(self, spine_items, other_items):
|
||||
@ -144,7 +144,7 @@ class Boss(QObject):
|
||||
c.remove_from_spine(spine_items)
|
||||
for name in other_items:
|
||||
c.remove_item(name)
|
||||
self.gui.action_save.setEnabled(True)
|
||||
self.set_modified()
|
||||
self.gui.file_list.delete_done(spine_items, other_items)
|
||||
for name in list(spine_items) + list(other_items):
|
||||
if name in editors:
|
||||
@ -166,7 +166,7 @@ class Boss(QObject):
|
||||
self.add_savepoint(_('Re-order text'))
|
||||
c = current_container()
|
||||
c.set_spine(items)
|
||||
self.gui.action_save.setEnabled(True)
|
||||
self.set_modified()
|
||||
self.gui.file_list.build(current_container()) # needed as the linear flag may have changed on some items
|
||||
if c.opf_name in editors:
|
||||
editors[c.opf_name].replace_data(c.raw_data(c.opf_name))
|
||||
@ -267,7 +267,7 @@ class Boss(QObject):
|
||||
_('Failed to rename files, click Show details for more information.'),
|
||||
det_msg=job.traceback, show=True)
|
||||
self.gui.file_list.build(current_container())
|
||||
self.gui.action_save.setEnabled(True)
|
||||
self.set_modified()
|
||||
if oldname in editors:
|
||||
editors[newname] = editors.pop(oldname)
|
||||
self.gui.central.rename_editor(editors[newname], newname)
|
||||
@ -307,6 +307,9 @@ class Boss(QObject):
|
||||
self.update_global_history_actions()
|
||||
# }}}
|
||||
|
||||
def set_modified(self):
|
||||
self.gui.action_save.setEnabled(True)
|
||||
|
||||
def fix_html(self, current):
|
||||
if current:
|
||||
ed = self.gui.central.current_editor
|
||||
@ -318,6 +321,22 @@ class Boss(QObject):
|
||||
self.add_savepoint(_('Fix HTML'))
|
||||
fix_all_html(current_container())
|
||||
self.update_editors_from_container()
|
||||
self.set_modified()
|
||||
|
||||
def pretty_print(self, current):
|
||||
if current:
|
||||
ed = self.gui.central.current_editor
|
||||
for name, x in editors.iteritems():
|
||||
if x is ed:
|
||||
break
|
||||
ed.pretty_print(name)
|
||||
else:
|
||||
if not self.check_dirtied():
|
||||
return
|
||||
self.add_savepoint(_('Beautify files'))
|
||||
pretty_all(current_container())
|
||||
self.update_editors_from_container()
|
||||
self.set_modified()
|
||||
|
||||
def mark_selected_text(self):
|
||||
ed = self.gui.central.current_editor
|
||||
@ -658,7 +677,7 @@ class Boss(QObject):
|
||||
def editor_modification_state_changed(self, is_modified):
|
||||
self.apply_current_editor_state(update_keymap=False)
|
||||
if is_modified:
|
||||
actions['save-book'].setEnabled(True)
|
||||
self.set_modified()
|
||||
# }}}
|
||||
|
||||
def apply_current_editor_state(self, update_keymap=True):
|
||||
|
@ -61,6 +61,7 @@ class TextEdit(QPlainTextEdit):
|
||||
self.blockCountChanged[int].connect(self.update_line_number_area_width)
|
||||
self.updateRequest.connect(self.update_line_number_area)
|
||||
self.line_number_area = LineNumbers(self)
|
||||
self.syntax = None
|
||||
|
||||
@dynamic_property
|
||||
def is_modified(self):
|
||||
@ -164,10 +165,6 @@ class TextEdit(QPlainTextEdit):
|
||||
sel.append(self.current_search_mark)
|
||||
self.setExtraSelections(sel)
|
||||
|
||||
def fix_html(self):
|
||||
from calibre.ebooks.oeb.polish.pretty import fix_html
|
||||
self.replace_text(fix_html(unicode(self.toPlainText())).decode('utf-8'))
|
||||
|
||||
# Search and replace {{{
|
||||
def mark_selected_text(self):
|
||||
sel = QTextEdit.ExtraSelection()
|
||||
|
@ -10,7 +10,7 @@ from PyQt4.Qt import QMainWindow, Qt, QApplication, pyqtSignal
|
||||
|
||||
from calibre import xml_replace_entities
|
||||
from calibre.gui2 import error_dialog
|
||||
from calibre.gui2.tweak_book import actions
|
||||
from calibre.gui2.tweak_book import actions, current_container
|
||||
from calibre.gui2.tweak_book.editor.text import TextEdit
|
||||
|
||||
class Editor(QMainWindow):
|
||||
@ -133,6 +133,8 @@ class Editor(QMainWindow):
|
||||
self.tools_bar = b = self.addToolBar(_('Editor tools'))
|
||||
if self.syntax == 'html':
|
||||
b.addAction(actions['fix-html-current'])
|
||||
if self.syntax in {'xml', 'html', 'css'}:
|
||||
b.addAction(actions['pretty-current'])
|
||||
|
||||
def break_cycles(self):
|
||||
self.modification_state_changed.disconnect()
|
||||
@ -183,7 +185,18 @@ class Editor(QMainWindow):
|
||||
|
||||
def fix_html(self):
|
||||
if self.syntax == 'html':
|
||||
self.editor.fix_html()
|
||||
from calibre.ebooks.oeb.polish.pretty import fix_html
|
||||
self.editor.replace_text(fix_html(current_container(), unicode(self.editor.toPlainText())).decode('utf-8'))
|
||||
return True
|
||||
return False
|
||||
|
||||
def pretty_print(self, name):
|
||||
from calibre.ebooks.oeb.polish.pretty import pretty_html, pretty_css, pretty_xml
|
||||
if self.syntax in {'css', 'html', 'xml'}:
|
||||
func = {'css':pretty_css, 'xml':pretty_xml}.get(self.syntax, pretty_html)
|
||||
self.editor.replace_text(func(current_container(), name, unicode(self.editor.toPlainText())).decode('utf-8'))
|
||||
return True
|
||||
return False
|
||||
|
||||
def launch_editor(path_to_edit, path_is_raw=False, syntax='html'):
|
||||
if path_is_raw:
|
||||
|
@ -214,6 +214,10 @@ class Main(MainWindow):
|
||||
_('Fix HTML in the current file'))
|
||||
self.action_fix_html_all = reg('html-fix.png', _('&Fix HTML - all files'), partial(self.boss.fix_html, False), 'fix-html-all', (),
|
||||
_('Fix HTML in all files'))
|
||||
self.action_pretty_current = reg('format-justify-fill.png', _('&Beautify current file'), partial(self.boss.pretty_print, True), 'pretty-current', (),
|
||||
_('Beautify current file'))
|
||||
self.action_pretty_all = reg('format-justify-fill.png', _('&Beautify all files'), partial(self.boss.pretty_print, False), 'pretty-all', (),
|
||||
_('Beautify all files'))
|
||||
|
||||
# Polish actions
|
||||
group = _('Polish Book')
|
||||
@ -291,6 +295,7 @@ class Main(MainWindow):
|
||||
e.addAction(self.action_subset_fonts)
|
||||
e.addAction(self.action_smarten_punctuation)
|
||||
e.addAction(self.action_fix_html_all)
|
||||
e.addAction(self.action_pretty_all)
|
||||
|
||||
e = b.addMenu(_('&View'))
|
||||
t = e.addMenu(_('Tool&bars'))
|
||||
|
Loading…
x
Reference in New Issue
Block a user