Start work on HTML transform tool

This commit is contained in:
Kovid Goyal 2021-11-09 20:43:56 +05:30
parent 18dcc8734c
commit 8b85aa82d9
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 553 additions and 0 deletions

View File

@ -0,0 +1,123 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from calibre.utils.serialize import json_dumps, json_loads
class Action:
def __init__(self, name, short_text, long_text, placeholder=''):
self.name = name
self.short_text = short_text
self.long_text = long_text
self.placeholder = placeholder
ACTION_MAP = {a.name: a for a in (
Action('rename', _('Change tag name'), _('Rename tag to the specified name'), _('New tag name')),
Action('remove', _('Remove tag and children'), _('Remove the tag and all its contents')),
Action('unwrap', _('Remove tag only'), _('Remove the tag but keep its contents')),
Action('add_classes', _('Add classes'), _('Add the specified classes, for e.g.:') + ' bold green', _('Space separated class names')),
Action('remove_classes', _('Remove classes'), _('Remove the specified classes, for e.g:') + ' bold green', _('Space separated class names')),
Action('wrap', _('Wrap the tag'), _(
'Wrap the tag in the specified tag, for example: {0} will wrap the tag in a DIV tag with class {1}').format(
'&lt;div class="box"&gt;', 'box'), _('A HTML opening tag')),
Action('remove_attrs', _('Remove attributes'), _(
'Remove the specified attributes from the tag. Multiple attribute names should be separated by spaces'), _('Space separated attribute names')),
Action('add_attrs', _('Add attributes'), _('Add the specified attributes, for e.g.:') + ' class="red" name="test"', _('Space separated attribute names')),
Action('empty', _('Empty the tag'), _('Remove all contents from the tag')),
Action('insert', _('Insert HTML at start'), _(
'The specified HTML snippet is inserted after the opening tag. Note that only valid HTML snippets can be used without unclosed tags'),
_('HTML snippet')),
Action('insert_end', _('Insert HTML at end'), _(
'The specified HTML snippet is inserted before the closing tag. Note that only valid HTML snippets can be used without unclosed tags'),
_('HTML snippet')),
Action('prepend', _('Insert HTML before tag'), _(
'The specified HTML snippet is inserted before the opening tag. Note that only valid HTML snippets can be used without unclosed tags'),
_('HTML snippet')),
Action('append', _('Insert HTML after tag'), _(
'The specified HTML snippet is inserted after the closing tag. Note that only valid HTML snippets can be used without unclosed tags'),
_('HTML snippet')),
)}
class Match:
def __init__(self, name, text, placeholder=''):
self.name = name
self.text = text
self.placeholder = placeholder
MATCH_TYPE_MAP = {m.name: m for m in (
Match('is', _('is'), _('Tag name')),
Match('has_class', _('has class'), _('Class name')),
Match('not_has_class', _('does not have class'), _('Class name')),
Match('css', _('matches CSS selector'), _('CSS selector')),
Match('xpath', _('matches XPath selector'), _('XPath selector')),
Match('*', _('is any tag')),
)}
allowed_keys = frozenset('property match_type query action action_data'.split())
def validate_rule(rule):
return None, None
def compile_rules(serialized_rules):
raise NotImplementedError('TODO: Implement this')
def transform_container(container, serialized_rules, names=()):
rules = compile_rules(serialized_rules)
rules
raise NotImplementedError('TODO: Implement this')
def rule_to_text(rule):
def get(prop):
return rule.get(prop) or ''
text = _('If the tag {match_type} {query}').format(
match_type=MATCH_TYPE_MAP[rule['match_type']].short_text, query=get('query'))
for action in rule['actions']:
text += '\n'
text += _('{action_type} {action_data}').format(
action_type=ACTION_MAP[action['type']].short_text, action_data=action['data'])
return text
def export_rules(serialized_rules):
return json_dumps(serialized_rules, indent=2, sort_keys=True)
def import_rules(raw_data):
return json_loads(raw_data)
def test(return_tests=False): # {{{
import unittest
class TestTransforms(unittest.TestCase):
longMessage = True
maxDiff = None
ae = unittest.TestCase.assertEqual
def test_matching(self):
pass
def test_export_import(self):
rule = {'property':'a', 'match_type':'*', 'query':'some text', 'action':'remove', 'action_data':'color: red; a: b'}
self.ae(rule, next(import_rules(export_rules([rule]))))
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestTransforms)
if return_tests:
return tests
unittest.TextTestRunner(verbosity=4).run(tests)
if __name__ == '__main__':
test()
# }}}

View File

@ -0,0 +1,430 @@
#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
from qt.core import (
QComboBox, QDialogButtonBox, QFrame, QHBoxLayout, QIcon, QLabel, QLineEdit,
QMenu, QPushButton, QScrollArea, QSize, Qt, QTextCursor, QToolButton,
QVBoxLayout, QWidget, pyqtSignal
)
from calibre.ebooks.html_transform_rules import (
ACTION_MAP, MATCH_TYPE_MAP, compile_rules, export_rules, import_rules,
validate_rule
)
from calibre.gui2 import choose_files, choose_save_file, elided_text, error_dialog
from calibre.gui2.tag_mapper import (
RuleEditDialog as RuleEditDialogBase, RuleItem as RuleItemBase,
Rules as RulesBase, RulesDialog as RulesDialogBase, SaveLoadMixin
)
from calibre.gui2.widgets2 import Dialog
from calibre.utils.config import JSONConfig
from calibre.utils.localization import localize_user_manual_link
class TagAction(QWidget):
remove_action = pyqtSignal(object)
def __init__(self, parent=None):
super().__init__(parent)
self.l = l = QVBoxLayout(self)
self.h = h = QHBoxLayout()
l.addLayout(h)
english_sentence = '{action_type} {action_data}'
sentence = _('{action_type} {action_data}')
if set(sentence.split()) != set(english_sentence.split()):
sentence = english_sentence
parts = sentence.split()
for clause in parts:
if clause == '{action_data}':
self.action_data = w = QLineEdit(self)
w.setClearButtonEnabled(True)
elif clause == '{action_type}':
self.action_type = w = QComboBox(self)
for action, ac in ACTION_MAP.items():
w.addItem(ac.short_text, action)
w.currentIndexChanged.connect(self.update_state)
h.addWidget(w)
if clause is not parts[-1]:
h.addWidget(QLabel('\xa0'))
self.h2 = h = QHBoxLayout()
l.addLayout(h)
self.remove_button = b = QToolButton(self)
b.setToolTip(_('Remove this action')), b.setIcon(QIcon(I('minus.png')))
b.clicked.connect(self.request_remove)
h.addWidget(b)
self.action_desc = la = QLabel('')
la.setWordWrap(True)
la.setTextFormat(Qt.TextFormat.RichText)
h.addWidget(la)
self.sep = sep = QFrame(self)
sep.setFrameShape(QFrame.Shape.HLine)
l.addWidget(sep)
self.update_state()
def request_remove(self):
self.remove_action.emit(self)
@property
def as_dict(self):
return {'type': self.action_type.currentData(), 'data': self.action_data.text()}
@as_dict.setter
def as_dict(self, val):
self.action_data.setText(val.get('data') or '')
at = val.get('type')
if at:
idx = self.action_type.findData(at)
if idx > -1:
self.action_type.setCurrentIndex(idx)
def update_state(self):
val = self.as_dict
ac = ACTION_MAP[val['type']]
self.action_desc.setText(ac.long_text)
if ac.placeholder:
self.action_data.setVisible(True)
self.action_data.setPlaceholderText(ac.placeholder)
else:
self.action_data.setVisible(False)
class ActionsContainer(QScrollArea):
def __init__(self, parent=None):
super().__init__(parent)
self.setWidgetResizable(True)
self.w = w = QWidget()
self.setWidget(w)
w.l = QVBoxLayout(w)
w.l.addStretch(1)
self.all_actions = []
self.new_action()
def new_action(self):
a = TagAction(self)
self.all_actions.append(a)
l = self.w.l
a.remove_action.connect(self.remove_action)
l.insertWidget(l.count() - 1, a)
a.action_type.setFocus(Qt.FocusReason.OtherFocusReason)
return a
def remove_action(self, ac):
if ac in self.all_actions:
self.w.l.removeWidget(ac)
del self.all_actions[self.all_actions.index(ac)]
ac.deleteLater()
def sizeHint(self):
ans = super().sizeHint()
ans.setHeight(ans.height() + 200)
return ans
@property
def as_list(self):
return [t.as_dict for t in self.all_actions]
@as_list.setter
def as_list(self, val):
for ac in tuple(self.all_actions):
self.remove_action(ac)
for entry in val:
self.new_action().as_dict = entry
class RuleEdit(QWidget): # {{{
MSG = _('Create the rule to transform HTML tags below')
def __init__(self, parent=None):
QWidget.__init__(self, parent)
self.l = l = QVBoxLayout(self)
self.h = h = QHBoxLayout()
self.la = la = QLabel(self.MSG)
la.setWordWrap(True)
l.addWidget(la)
l.addLayout(h)
english_sentence = '{preamble} {match_type}'
sentence = _('{preamble} {match_type} {query}')
if set(sentence.split()) != set(english_sentence.split()):
sentence = english_sentence
parts = sentence.split()
for clause in parts:
if clause == '{preamble}':
self.preamble = w = QLabel(_('If the tag'))
elif clause == '{match_type}':
self.match_type = w = QComboBox(self)
for action, m in MATCH_TYPE_MAP.items():
w.addItem(m.text, action)
w.currentIndexChanged.connect(self.update_state)
h.addWidget(w)
if clause is not parts[-1]:
h.addWidget(QLabel('\xa0'))
h.addStretch(1)
self.hl = h = QHBoxLayout()
l.addLayout(h)
self.query = q = QLineEdit(self)
q.setClearButtonEnabled(True)
h.addWidget(q)
h.addSpacing(20)
self.query_help_label = la = QLabel(self)
la.setOpenExternalLinks(True)
h.addWidget(la)
self.thenl = QLabel(_('Then:'))
l.addWidget(self.thenl)
self.actions = a = ActionsContainer(self)
l.addWidget(a)
self.add_button = b = QPushButton(QIcon(I('plus.png')), _('Add another action'))
b.clicked.connect(self.actions.new_action)
l.addWidget(b)
self.update_state()
def sizeHint(self):
a = QWidget.sizeHint(self)
a.setHeight(a.height() + 375)
a.setWidth(a.width() + 125)
return a
def update_state(self):
r = self.rule
mt = r['match_type']
self.query.setVisible(mt != '*')
self.query.setPlaceholderText(MATCH_TYPE_MAP[mt].placeholder)
self.query_help_label.setVisible(mt in ('css', 'xpath'))
if self.query_help_label.isVisible():
if mt == 'css':
url = 'https://developer.mozilla.org/en-US/docs/Learn/CSS/Building_blocks/Selectors'
text = _('CSS Selector help')
else:
url = localize_user_manual_link('https://manual.calibre-ebook.com/xpath.html')
text = _('XPath selector help')
self.query_help_label.setText(f'<a href="{url}">{text}</a>')
@property
def rule(self):
return {
'match_type': self.match_type.currentData(),
'query': self.query.text().strip(),
'actions': self.actions.as_list,
}
@rule.setter
def rule(self, rule):
def sc(name):
c = getattr(self, name)
c.setCurrentIndex(max(0, c.findData(str(rule.get(name, '')))))
sc('match_type')
self.query.setText(str(rule.get('query', '')).strip())
self.actions.as_list = rule.get('actions') or []
self.update_state()
def validate(self):
rule = self.rule
title, msg = validate_rule(rule)
if msg is not None and title is not None:
error_dialog(self, title, msg, show=True)
return False
return True
# }}}
class RuleEditDialog(RuleEditDialogBase): # {{{
PREFS_NAME = 'edit-css-transform-rule'
DIALOG_TITLE = _('Edit rule')
RuleEditClass = RuleEdit
# }}}
class RuleItem(RuleItemBase): # {{{
@staticmethod
def text_from_rule(rule, parent):
try:
query = elided_text(rule['query'], font=parent.font(), width=200, pos='right')
text = _(
'If the property <i>{property}</i> <b>{match_type}</b> <b>{query}</b><br>{action}').format(
property=rule['property'], action=ACTION_MAP[rule['action']],
match_type=MATCH_TYPE_MAP[rule['match_type']].text, query=query)
if rule['action_data']:
ad = elided_text(rule['action_data'], font=parent.font(), width=200, pos='right')
text += ' <code>%s</code>' % ad
except Exception:
import traceback
traceback.print_exc()
text = _('This rule is invalid, please remove it')
return text
# }}}
class Rules(RulesBase): # {{{
RuleItemClass = RuleItem
RuleEditDialogClass = RuleEditDialog
MSG = _('You can specify rules to transform styles here. Click the "Add rule" button'
' below to get started.')
# }}}
class Tester(Dialog): # {{{
DIALOG_TITLE = _('Test style transform rules')
PREFS_NAME = 'test-style-transform-rules'
LABEL = _('Enter a CSS stylesheet below to test')
def __init__(self, rules, parent=None):
self.rules = compile_rules(rules)
Dialog.__init__(self, self.DIALOG_TITLE, self.PREFS_NAME, parent=parent)
def setup_ui(self):
from calibre.gui2.tweak_book.editor.text import TextEdit
self.l = l = QVBoxLayout(self)
self.bb.setStandardButtons(QDialogButtonBox.StandardButton.Close)
self.la = la = QLabel(self.LABEL)
l.addWidget(la)
self.css = t = TextEdit(self)
t.load_text('/* %s */\n' % _('Enter CSS rules below and click the "Test" button'), 'css')
la.setBuddy(t)
c = t.textCursor()
c.movePosition(QTextCursor.MoveOperation.End)
t.setTextCursor(c)
self.h = h = QHBoxLayout()
l.addLayout(h)
h.addWidget(t)
self.test_button = b = QPushButton(_('&Test'), self)
b.clicked.connect(self.do_test)
h.addWidget(b)
self.result = la = TextEdit(self)
la.setReadOnly(True)
l.addWidget(la)
l.addWidget(self.bb)
@property
def value(self):
return self.css.toPlainText()
def do_test(self):
raise NotImplementedError('TODO: Implement this')
def sizeHint(self):
return QSize(800, 600)
# }}}
class RulesDialog(RulesDialogBase): # {{{
DIALOG_TITLE = _('Edit style transform rules')
PREFS_NAME = 'edit-style-transform-rules'
RulesClass = Rules
TesterClass = Tester
def __init__(self, *args, **kw):
# This has to be loaded on instantiation as it can be shared by
# multiple processes
self.PREFS_OBJECT = JSONConfig('style-transform-rules')
RulesDialogBase.__init__(self, *args, **kw)
# }}}
class RulesWidget(QWidget, SaveLoadMixin): # {{{
changed = pyqtSignal()
def __init__(self, parent=None):
self.loaded_ruleset = None
QWidget.__init__(self, parent)
self.PREFS_OBJECT = JSONConfig('style-transform-rules')
l = QVBoxLayout(self)
self.rules_widget = w = Rules(self)
w.changed.connect(self.changed.emit)
l.addWidget(w)
self.h = h = QHBoxLayout()
l.addLayout(h)
self.export_button = b = QPushButton(_('E&xport'), self)
b.setToolTip(_('Export these rules to a file'))
b.clicked.connect(self.export_rules)
h.addWidget(b)
self.import_button = b = QPushButton(_('&Import'), self)
b.setToolTip(_('Import previously exported rules'))
b.clicked.connect(self.import_rules)
h.addWidget(b)
self.test_button = b = QPushButton(_('&Test rules'), self)
b.clicked.connect(self.test_rules)
h.addWidget(b)
h.addStretch(10)
self.save_button = b = QPushButton(_('&Save'), self)
b.setToolTip(_('Save this ruleset for later re-use'))
b.clicked.connect(self.save_ruleset)
h.addWidget(b)
self.export_button = b = QPushButton(_('&Load'), self)
self.load_menu = QMenu(self)
b.setMenu(self.load_menu)
b.setToolTip(_('Load a previously saved ruleset'))
b.clicked.connect(self.load_ruleset)
h.addWidget(b)
self.build_load_menu()
def export_rules(self):
rules = self.rules_widget.rules
if not rules:
return error_dialog(self, _('No rules'), _(
'There are no rules to export'), show=True)
path = choose_save_file(self, 'export-style-transform-rules', _('Choose file for exported rules'), initial_filename='rules.txt')
if path:
raw = export_rules(rules)
with open(path, 'wb') as f:
f.write(raw)
def import_rules(self):
paths = choose_files(self, 'export-style-transform-rules', _('Choose file to import rules from'), select_only_single_file=True)
if paths:
with open(paths[0], 'rb') as f:
rules = import_rules(f.read())
self.rules_widget.rules = list(rules) + list(self.rules_widget.rules)
self.changed.emit()
def load_ruleset(self, name):
SaveLoadMixin.load_ruleset(self, name)
self.changed.emit()
def test_rules(self):
Tester(self.rules_widget.rules, self).exec_()
@property
def rules(self):
return self.rules_widget.rules
@rules.setter
def rules(self, val):
try:
self.rules_widget.rules = val or []
except Exception:
import traceback
traceback.print_exc()
self.rules_widget.rules = []
# }}}
if __name__ == '__main__':
from calibre.gui2 import Application
app = Application([])
v = RuleEdit()
v.setWindowFlag(Qt.WindowType.Dialog)
v.show()
app.exec_()
# d = RulesDialog()
# d.rules = [
# {'match_type':'*', 'query':'', 'action':'change', 'action_data':'green'},
# ]
# d.exec_()
# from pprint import pprint
# pprint(d.rules)
# del d, app