mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
More HTML transform actions
This commit is contained in:
parent
c0cd4f0e73
commit
0d0c9c3f02
@ -5,6 +5,7 @@
|
|||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
from html5_parser import parse
|
from html5_parser import parse
|
||||||
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.oeb.parse_utils import XHTML
|
from calibre.ebooks.oeb.parse_utils import XHTML
|
||||||
from calibre.ebooks.oeb.base import OEB_DOCS, XPath
|
from calibre.ebooks.oeb.base import OEB_DOCS, XPath
|
||||||
@ -259,6 +260,47 @@ def wrap(data, tag):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def parse_html_snippet(text):
|
||||||
|
return parse(f'<div>{text}</div>', namespace_elements=True, fragment_context='div')[0]
|
||||||
|
|
||||||
|
|
||||||
|
def clone(src_element, target_tree):
|
||||||
|
if src_element.tag is etree.Comment:
|
||||||
|
ans = etree.Comment('')
|
||||||
|
else:
|
||||||
|
ans = target_tree.makeelement(src_element.tag)
|
||||||
|
for k, v in src_element.items():
|
||||||
|
ans.set(k, v)
|
||||||
|
ans.extend(src_element)
|
||||||
|
ans.text = src_element.text
|
||||||
|
ans.tail = src_element.tail
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
|
def insert_snippet(container, before_children, tag):
|
||||||
|
if before_children:
|
||||||
|
orig_text = tag.text
|
||||||
|
tag.text = container.text
|
||||||
|
if len(container):
|
||||||
|
for i, child in enumerate(reversed(container)):
|
||||||
|
c = clone(child, tag)
|
||||||
|
tag.insert(0, c)
|
||||||
|
if i == 0 and orig_text:
|
||||||
|
c.tail = (c.tail or '') + orig_text
|
||||||
|
else:
|
||||||
|
tag.text = (tag.text or '') + orig_text
|
||||||
|
else:
|
||||||
|
if container.text:
|
||||||
|
if len(tag) > 0:
|
||||||
|
tag[-1].tail = (tag[-1].tail or '') + container.text
|
||||||
|
else:
|
||||||
|
tag.text = (tag.text or '') + container.text
|
||||||
|
for child in container:
|
||||||
|
c = clone(child, tag)
|
||||||
|
tag.append(c)
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
action_map = {
|
action_map = {
|
||||||
'rename': lambda data: partial(rename_tag, qualify_tag_name(data)),
|
'rename': lambda data: partial(rename_tag, qualify_tag_name(data)),
|
||||||
'remove': lambda data: remove_tag,
|
'remove': lambda data: remove_tag,
|
||||||
@ -269,6 +311,8 @@ action_map = {
|
|||||||
'remove_attrs': lambda data: partial(remove_attrs, str.split(data)),
|
'remove_attrs': lambda data: partial(remove_attrs, str.split(data)),
|
||||||
'add_attrs': lambda data: partial(add_attrs, parse_attrs(data)),
|
'add_attrs': lambda data: partial(add_attrs, parse_attrs(data)),
|
||||||
'wrap': lambda data: partial(wrap, parse_start_tag(data)),
|
'wrap': lambda data: partial(wrap, parse_start_tag(data)),
|
||||||
|
'insert': lambda data: partial(insert_snippet, parse_html_snippet(data), True),
|
||||||
|
'insert_end': lambda data: partial(insert_snippet, parse_html_snippet(data), False),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -426,7 +470,6 @@ def test(return_tests=False): # {{{
|
|||||||
self.ae(rule, next(iter(import_rules(export_rules([rule])))))
|
self.ae(rule, next(iter(import_rules(export_rules([rule])))))
|
||||||
|
|
||||||
def test_html_transform_actions(self):
|
def test_html_transform_actions(self):
|
||||||
from lxml import etree
|
|
||||||
|
|
||||||
def r(html='<p>hello'):
|
def r(html='<p>hello'):
|
||||||
return parse(namespace_elements=True, html=html)[1]
|
return parse(namespace_elements=True, html=html)[1]
|
||||||
@ -502,6 +545,20 @@ def test(return_tests=False): # {{{
|
|||||||
self.assertTrue(t('wrap', '<div a=b c=d>')(p))
|
self.assertTrue(t('wrap', '<div a=b c=d>')(p))
|
||||||
ax(p.getparent(), '<div a="b" c="d"><p>t<span>s</span></p></div>tail')
|
ax(p.getparent(), '<div a="b" c="d"><p>t<span>s</span></p></div>tail')
|
||||||
|
|
||||||
|
p = r('<p>hello<span>s')[0]
|
||||||
|
self.assertTrue(t('insert', 'text<div a=b c=d><!-- comm -->tail')(p))
|
||||||
|
ax(p, '<p>text<div a="b" c="d"><!-- comm -->tail</div>hello<span>s</span></p>')
|
||||||
|
p = r('<p>hello<span>s')[0]
|
||||||
|
self.assertTrue(t('insert', 'text')(p))
|
||||||
|
ax(p, '<p>texthello<span>s</span></p>')
|
||||||
|
|
||||||
|
p = r('<p>hello<span>s')[0]
|
||||||
|
self.assertTrue(t('insert_end', 'text<div><!-- comm -->tail')(p))
|
||||||
|
ax(p, '<p>hello<span>s</span>text<div><!-- comm -->tail</div></p>')
|
||||||
|
p = r('<p>hello<span>s</span>tail')[0]
|
||||||
|
self.assertTrue(t('insert_end', 'text')(p))
|
||||||
|
ax(p, '<p>hello<span>s</span>tailtext</p>')
|
||||||
|
|
||||||
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestTransforms)
|
tests = unittest.defaultTestLoader.loadTestsFromTestCase(TestTransforms)
|
||||||
if return_tests:
|
if return_tests:
|
||||||
return tests
|
return tests
|
||||||
|
Loading…
x
Reference in New Issue
Block a user