Implement fixing html to create well formed markup

This commit is contained in:
Kovid Goyal 2013-11-25 14:22:47 +05:30
parent 01f313af1f
commit 9e9ea5178f
5 changed files with 52 additions and 0 deletions

View File

@ -0,0 +1,22 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
from calibre.ebooks.oeb.polish.parsing import parse
from calibre.ebooks.oeb.base import serialize, OEB_DOCS
def fix_html(raw):
root = parse(raw)
return serialize(root, 'text/html').decode('utf-8')
def fix_all_html(container):
for name, mt in container.mime_map.iteritems():
if mt in OEB_DOCS:
container.parsed(name)
container.dirty(name)

View File

@ -19,6 +19,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.base import urlnormalize
from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish
from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type
from calibre.ebooks.oeb.polish.pretty import fix_all_html
from calibre.ebooks.oeb.polish.replace import rename_files from calibre.ebooks.oeb.polish.replace import rename_files
from calibre.ebooks.oeb.polish.split import split, merge, AbortError from calibre.ebooks.oeb.polish.split import split, merge, AbortError
from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog
@ -306,6 +307,18 @@ class Boss(QObject):
self.update_global_history_actions() self.update_global_history_actions()
# }}} # }}}
def fix_html(self, current):
if current:
ed = self.gui.central.current_editor
if hasattr(ed, 'fix_html'):
ed.fix_html()
else:
if not self.check_dirtied():
return
self.add_savepoint(_('Fix HTML'))
fix_all_html(current_container())
self.update_editors_from_container()
def mark_selected_text(self): def mark_selected_text(self):
ed = self.gui.central.current_editor ed = self.gui.central.current_editor
if ed is not None: if ed is not None:
@ -657,6 +670,7 @@ class Boss(QObject):
actions['editor-cut'].setEnabled(ed.copy_available) actions['editor-cut'].setEnabled(ed.copy_available)
actions['editor-copy'].setEnabled(ed.cut_available) actions['editor-copy'].setEnabled(ed.cut_available)
actions['go-to-line-number'].setEnabled(ed.has_line_numbers) actions['go-to-line-number'].setEnabled(ed.has_line_numbers)
actions['fix-html-current'].setEnabled(ed.syntax == 'html')
self.gui.keyboard.set_mode(ed.syntax) self.gui.keyboard.set_mode(ed.syntax)
name = None name = None
for n, x in editors.iteritems(): for n, x in editors.iteritems():

View File

@ -164,6 +164,10 @@ class TextEdit(QPlainTextEdit):
sel.append(self.current_search_mark) sel.append(self.current_search_mark)
self.setExtraSelections(sel) self.setExtraSelections(sel)
def fix_html(self):
from calibre.ebooks.oeb.polish.pretty import fix_html
self.replace_text(fix_html(unicode(self.toPlainText())).decode('utf-8'))
# Search and replace {{{ # Search and replace {{{
def mark_selected_text(self): def mark_selected_text(self):
sel = QTextEdit.ExtraSelection() sel = QTextEdit.ExtraSelection()

View File

@ -130,6 +130,9 @@ class Editor(QMainWindow):
b.addAction(actions['editor-%s' % x]) b.addAction(actions['editor-%s' % x])
except KeyError: except KeyError:
pass pass
self.tools_bar = b = self.addToolBar(_('Editor tools'))
if self.syntax == 'html':
b.addAction(actions['fix-html-current'])
def break_cycles(self): def break_cycles(self):
self.modification_state_changed.disconnect() self.modification_state_changed.disconnect()
@ -178,6 +181,10 @@ class Editor(QMainWindow):
def contextMenuEvent(self, ev): def contextMenuEvent(self, ev):
ev.ignore() ev.ignore()
def fix_html(self):
if self.syntax == 'html':
self.editor.fix_html()
def launch_editor(path_to_edit, path_is_raw=False, syntax='html'): def launch_editor(path_to_edit, path_is_raw=False, syntax='html'):
if path_is_raw: if path_is_raw:
raw = path_to_edit raw = path_to_edit

View File

@ -210,6 +210,10 @@ class Main(MainWindow):
# Tool actions # Tool actions
group = _('Tools') group = _('Tools')
self.action_toc = reg('toc.png', _('&Edit Table of Contents'), self.boss.edit_toc, 'edit-toc', (), _('Edit Table of Contents')) self.action_toc = reg('toc.png', _('&Edit Table of Contents'), self.boss.edit_toc, 'edit-toc', (), _('Edit Table of Contents'))
self.action_fix_html_current = reg('html-fix.png', _('&Fix HTML'), partial(self.boss.fix_html, True), 'fix-html-current', (),
_('Fix HTML in the current file'))
self.action_fix_html_all = reg('html-fix.png', _('&Fix HTML - all files'), partial(self.boss.fix_html, False), 'fix-html-all', (),
_('Fix HTML in all files'))
# Polish actions # Polish actions
group = _('Polish Book') group = _('Polish Book')
@ -286,6 +290,7 @@ class Main(MainWindow):
e.addAction(self.action_embed_fonts) e.addAction(self.action_embed_fonts)
e.addAction(self.action_subset_fonts) e.addAction(self.action_subset_fonts)
e.addAction(self.action_smarten_punctuation) e.addAction(self.action_smarten_punctuation)
e.addAction(self.action_fix_html_all)
e = b.addMenu(_('&View')) e = b.addMenu(_('&View'))
t = e.addMenu(_('Tool&bars')) t = e.addMenu(_('Tool&bars'))