From 9e9ea5178fe9c066e8124fc8385fbb3486a2c085 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 25 Nov 2013 14:22:47 +0530 Subject: [PATCH] Implement fixing html to create well formed markup --- src/calibre/ebooks/oeb/polish/pretty.py | 22 ++++++++++++++++++++ src/calibre/gui2/tweak_book/boss.py | 14 +++++++++++++ src/calibre/gui2/tweak_book/editor/text.py | 4 ++++ src/calibre/gui2/tweak_book/editor/widget.py | 7 +++++++ src/calibre/gui2/tweak_book/ui.py | 5 +++++ 5 files changed, 52 insertions(+) create mode 100644 src/calibre/ebooks/oeb/polish/pretty.py diff --git a/src/calibre/ebooks/oeb/polish/pretty.py b/src/calibre/ebooks/oeb/polish/pretty.py new file mode 100644 index 0000000000..9c2196ddc3 --- /dev/null +++ b/src/calibre/ebooks/oeb/polish/pretty.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +from __future__ import (unicode_literals, division, absolute_import, + print_function) + +__license__ = 'GPL v3' +__copyright__ = '2013, Kovid Goyal ' + + +from calibre.ebooks.oeb.polish.parsing import parse +from calibre.ebooks.oeb.base import serialize, OEB_DOCS + +def fix_html(raw): + root = parse(raw) + return serialize(root, 'text/html').decode('utf-8') + +def fix_all_html(container): + for name, mt in container.mime_map.iteritems(): + if mt in OEB_DOCS: + container.parsed(name) + container.dirty(name) + diff --git a/src/calibre/gui2/tweak_book/boss.py b/src/calibre/gui2/tweak_book/boss.py index db7743e855..969676c130 100644 --- a/src/calibre/gui2/tweak_book/boss.py +++ b/src/calibre/gui2/tweak_book/boss.py @@ -19,6 +19,7 @@ from calibre.ptempfile import PersistentTemporaryDirectory from calibre.ebooks.oeb.base import urlnormalize from calibre.ebooks.oeb.polish.main import SUPPORTED, tweak_polish from calibre.ebooks.oeb.polish.container import get_container as _gc, clone_container, guess_type +from calibre.ebooks.oeb.polish.pretty import fix_all_html from calibre.ebooks.oeb.polish.replace import rename_files from calibre.ebooks.oeb.polish.split import split, merge, AbortError from calibre.gui2 import error_dialog, choose_files, question_dialog, info_dialog @@ -306,6 +307,18 @@ class Boss(QObject): self.update_global_history_actions() # }}} + def fix_html(self, current): + if current: + ed = self.gui.central.current_editor + if hasattr(ed, 'fix_html'): + ed.fix_html() + else: + if not self.check_dirtied(): + return + self.add_savepoint(_('Fix HTML')) + fix_all_html(current_container()) + self.update_editors_from_container() + def mark_selected_text(self): ed = self.gui.central.current_editor if ed is not None: @@ -657,6 +670,7 @@ class Boss(QObject): actions['editor-cut'].setEnabled(ed.copy_available) actions['editor-copy'].setEnabled(ed.cut_available) actions['go-to-line-number'].setEnabled(ed.has_line_numbers) + actions['fix-html-current'].setEnabled(ed.syntax == 'html') self.gui.keyboard.set_mode(ed.syntax) name = None for n, x in editors.iteritems(): diff --git a/src/calibre/gui2/tweak_book/editor/text.py b/src/calibre/gui2/tweak_book/editor/text.py index 056579e6ac..9d728cdedb 100644 --- a/src/calibre/gui2/tweak_book/editor/text.py +++ b/src/calibre/gui2/tweak_book/editor/text.py @@ -164,6 +164,10 @@ class TextEdit(QPlainTextEdit): sel.append(self.current_search_mark) self.setExtraSelections(sel) + def fix_html(self): + from calibre.ebooks.oeb.polish.pretty import fix_html + self.replace_text(fix_html(unicode(self.toPlainText())).decode('utf-8')) + # Search and replace {{{ def mark_selected_text(self): sel = QTextEdit.ExtraSelection() diff --git a/src/calibre/gui2/tweak_book/editor/widget.py b/src/calibre/gui2/tweak_book/editor/widget.py index 794d9ddc24..76cbf6b8e7 100644 --- a/src/calibre/gui2/tweak_book/editor/widget.py +++ b/src/calibre/gui2/tweak_book/editor/widget.py @@ -130,6 +130,9 @@ class Editor(QMainWindow): b.addAction(actions['editor-%s' % x]) except KeyError: pass + self.tools_bar = b = self.addToolBar(_('Editor tools')) + if self.syntax == 'html': + b.addAction(actions['fix-html-current']) def break_cycles(self): self.modification_state_changed.disconnect() @@ -178,6 +181,10 @@ class Editor(QMainWindow): def contextMenuEvent(self, ev): ev.ignore() + def fix_html(self): + if self.syntax == 'html': + self.editor.fix_html() + def launch_editor(path_to_edit, path_is_raw=False, syntax='html'): if path_is_raw: raw = path_to_edit diff --git a/src/calibre/gui2/tweak_book/ui.py b/src/calibre/gui2/tweak_book/ui.py index bf8c78cc22..d623cfbb7a 100644 --- a/src/calibre/gui2/tweak_book/ui.py +++ b/src/calibre/gui2/tweak_book/ui.py @@ -210,6 +210,10 @@ class Main(MainWindow): # Tool actions group = _('Tools') self.action_toc = reg('toc.png', _('&Edit Table of Contents'), self.boss.edit_toc, 'edit-toc', (), _('Edit Table of Contents')) + self.action_fix_html_current = reg('html-fix.png', _('&Fix HTML'), partial(self.boss.fix_html, True), 'fix-html-current', (), + _('Fix HTML in the current file')) + self.action_fix_html_all = reg('html-fix.png', _('&Fix HTML - all files'), partial(self.boss.fix_html, False), 'fix-html-all', (), + _('Fix HTML in all files')) # Polish actions group = _('Polish Book') @@ -286,6 +290,7 @@ class Main(MainWindow): e.addAction(self.action_embed_fonts) e.addAction(self.action_subset_fonts) e.addAction(self.action_smarten_punctuation) + e.addAction(self.action_fix_html_all) e = b.addMenu(_('&View')) t = e.addMenu(_('Tool&bars'))