More HTML transform actions

This commit is contained in:
Kovid Goyal 2021-11-10 19:37:18 +05:30
parent dd2a97d226
commit 1097c34667
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -8,6 +8,7 @@ from html5_parser import parse
from calibre.ebooks.oeb.parse_utils import XHTML from calibre.ebooks.oeb.parse_utils import XHTML
from calibre.ebooks.oeb.base import OEB_DOCS, XPath from calibre.ebooks.oeb.base import OEB_DOCS, XPath
from calibre.ebooks.metadata.tag_mapper import uniq
from calibre.utils.serialize import json_dumps, json_loads from calibre.utils.serialize import json_dumps, json_loads
from css_selectors.select import Select, get_parsed_selector from css_selectors.select import Select, get_parsed_selector
@ -39,13 +40,13 @@ ACTION_MAP = {a.name: a for a in (
Action('unwrap', _('Remove tag only'), _('Remove the tag but keep its contents')), Action('unwrap', _('Remove tag only'), _('Remove the tag but keep its contents')),
Action('add_classes', _('Add classes'), _('Add the specified classes, for e.g.:') + ' bold green', _('Space separated class names')), Action('add_classes', _('Add classes'), _('Add the specified classes, for e.g.:') + ' bold green', _('Space separated class names')),
Action('remove_classes', _('Remove classes'), _('Remove the specified classes, for e.g:') + ' bold green', _('Space separated class names')), Action('remove_classes', _('Remove classes'), _('Remove the specified classes, for e.g:') + ' bold green', _('Space separated class names')),
Action('wrap', _('Wrap the tag'), _(
'Wrap the tag in the specified tag, for example: {0} will wrap the tag in a DIV tag with class {1}').format(
'<div class="box">', 'box'), _('An HTML opening tag')),
Action('remove_attrs', _('Remove attributes'), _( Action('remove_attrs', _('Remove attributes'), _(
'Remove the specified attributes from the tag. Multiple attribute names should be separated by spaces'), _('Space separated attribute names')), 'Remove the specified attributes from the tag. Multiple attribute names should be separated by spaces'), _('Space separated attribute names')),
Action('add_attrs', _('Add attributes'), _('Add the specified attributes, for e.g.:') + ' class="red" name="test"', _('Space separated attribute names')), Action('add_attrs', _('Add attributes'), _('Add the specified attributes, for e.g.:') + ' class="red" name="test"', _('Space separated attribute names')),
Action('empty', _('Empty the tag'), _('Remove all contents from the tag')), Action('empty', _('Empty the tag'), _('Remove all contents from the tag')),
Action('wrap', _('Wrap the tag'), _(
'Wrap the tag in the specified tag, for example: {0} will wrap the tag in a DIV tag with class {1}').format(
'<div class="box">', 'box'), _('An HTML opening tag')),
Action('insert', _('Insert HTML at start'), _( Action('insert', _('Insert HTML at start'), _(
'The specified HTML snippet is inserted after the opening tag. Note that only valid HTML snippets can be used without unclosed tags'), 'The specified HTML snippet is inserted after the opening tag. Note that only valid HTML snippets can be used without unclosed tags'),
_('HTML snippet')), _('HTML snippet')),
@ -180,10 +181,47 @@ def unwrap_tag(tag):
return True return True
def add_classes(classes, tag):
orig_cls = tag.get('class', '')
orig = list(filter(None, str.split(orig_cls)))
new_cls = ' '.join(uniq(orig + classes))
if new_cls != orig_cls:
tag.set('class', new_cls)
return True
return False
def remove_classes(classes, tag):
orig_cls = tag.get('class', '')
orig = list(filter(None, str.split(orig_cls)))
for x in classes:
while True:
try:
orig.remove(x)
except ValueError:
break
new_cls = ' '.join(orig)
if new_cls != orig_cls:
tag.set('class', new_cls)
return True
return False
def remove_attrs(attrs, tag):
changed = False
for a in attrs:
if tag.attrib.pop(a, None) is not None:
changed = True
return changed
action_map = { action_map = {
'rename': lambda data: partial(rename_tag, qualify_tag_name(data)), 'rename': lambda data: partial(rename_tag, qualify_tag_name(data)),
'remove': lambda data: remove_tag, 'remove': lambda data: remove_tag,
'unwrap': lambda data: unwrap_tag, 'unwrap': lambda data: unwrap_tag,
'add_classes': lambda data: partial(add_classes, str.split(data)),
'remove_classes': lambda data: partial(remove_classes, str.split(data)),
'remove_attrs': lambda data: partial(remove_attrs, str.split(data)),
} }
@ -375,6 +413,30 @@ def test(return_tests=False): # {{{
self.assertTrue(t('unwrap')(div[1])) self.assertTrue(t('unwrap')(div[1]))
ax(div, '<div><div/>text<span>unwrap</span>tail</div>') ax(div, '<div><div/>text<span>unwrap</span>tail</div>')
p = r()[0]
self.assertTrue(t('add_classes', 'a b')(p))
self.ae(p.get('class'), 'a b')
p = r('<p class="c a d">')[0]
self.assertTrue(t('add_classes', 'a b')(p))
self.ae(p.get('class'), 'c a d b')
p = r('<p class="c a d">')[0]
self.assertFalse(t('add_classes', 'a')(p))
self.ae(p.get('class'), 'c a d')
p = r()[0]
self.assertFalse(t('remove_classes', 'a b')(p))
self.ae(p.get('class'), None)
p = r('<p class="c a a d">')[0]
self.assertTrue(t('remove_classes', 'a')(p))
self.ae(p.get('class'), 'c d')
p = r()[0]
self.assertFalse(t('remove_attrs', 'a b')(p))
self.assertFalse(p.attrib)
p = r('<p class="c" x="y" id="p">')[0]
self.assertTrue(t('remove_attrs', 'class id')(p))
self.ae(list(p.attrib), ['x'])
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestTransforms) tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestTransforms)
if return_tests: if return_tests:
return tests return tests