mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Implement matching for HTML transform rules
This commit is contained in:
parent
4f3c5be22d
commit
2a9caad4d3
@ -5,9 +5,9 @@
|
|||||||
|
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
|
from calibre.ebooks.oeb.base import OEB_DOCS, XPath
|
||||||
from calibre.utils.serialize import json_dumps, json_loads
|
from calibre.utils.serialize import json_dumps, json_loads
|
||||||
from calibre.ebooks.oeb.base import XPath
|
from css_selectors.select import Select, get_parsed_selector
|
||||||
from css_selectors.select import get_parsed_selector
|
|
||||||
|
|
||||||
|
|
||||||
def non_empty_validator(label, val):
|
def non_empty_validator(label, val):
|
||||||
@ -132,14 +132,80 @@ def validate_rule(rule):
|
|||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
def compile_rules(serialized_rules):
|
class Action:
|
||||||
raise NotImplementedError('TODO: Implement this')
|
|
||||||
|
def __init__(self, serialized_action):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class Rule:
|
||||||
|
|
||||||
|
def __init__(self, serialized_rule):
|
||||||
|
self.sel_type = 'xpath'
|
||||||
|
mt = serialized_rule['match_type']
|
||||||
|
q = serialized_rule['query']
|
||||||
|
if mt == 'xpath':
|
||||||
|
self.xpath_selector = XPath(q)
|
||||||
|
self.selector = self.xpath
|
||||||
|
elif mt in ('is', 'css'):
|
||||||
|
self.css_selector = q
|
||||||
|
self.selector = self.css
|
||||||
|
elif mt == '*':
|
||||||
|
self.xpath_selector = XPath('//*')
|
||||||
|
self.selector = self.xpath
|
||||||
|
elif mt == 'has_class':
|
||||||
|
self.css_selector = '.' + q
|
||||||
|
self.selector = self.css
|
||||||
|
elif mt == 'not_has_class':
|
||||||
|
self.css_selector = f":not(.{q})"
|
||||||
|
self.selector = self.css
|
||||||
|
else:
|
||||||
|
raise KeyError(f'Unknown match_type: {mt}')
|
||||||
|
self.actions = tuple(map(Action, serialized_rule['actions']))
|
||||||
|
|
||||||
|
def xpath(self, root):
|
||||||
|
return self.xpath_selector(root)
|
||||||
|
|
||||||
|
def css(self, root):
|
||||||
|
return tuple(Select(root)(self.css_selector))
|
||||||
|
|
||||||
|
def __call__(self, root):
|
||||||
|
changed = False
|
||||||
|
for tag in self.selector(root):
|
||||||
|
for action in self.actions:
|
||||||
|
if action(tag):
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
|
def transform_doc(root, rules):
|
||||||
|
changed = False
|
||||||
|
for rule in rules:
|
||||||
|
if rule(root):
|
||||||
|
changed = True
|
||||||
|
return changed
|
||||||
|
|
||||||
|
|
||||||
def transform_container(container, serialized_rules, names=()):
|
def transform_container(container, serialized_rules, names=()):
|
||||||
rules = compile_rules(serialized_rules)
|
if not names:
|
||||||
rules
|
types = OEB_DOCS
|
||||||
raise NotImplementedError('TODO: Implement this')
|
names = []
|
||||||
|
for name, mt in container.mime_map.items():
|
||||||
|
if mt in types:
|
||||||
|
names.append(name)
|
||||||
|
|
||||||
|
doc_changed = False
|
||||||
|
rules = tuple(Rule(r) for r in serialized_rules)
|
||||||
|
|
||||||
|
for name in names:
|
||||||
|
mt = container.mime_map.get(name)
|
||||||
|
if mt in OEB_DOCS:
|
||||||
|
root = container.parsed(name)
|
||||||
|
if transform_doc(root, rules):
|
||||||
|
container.dirty(name)
|
||||||
|
doc_changed = True
|
||||||
|
|
||||||
|
return doc_changed
|
||||||
|
|
||||||
|
|
||||||
def rule_to_text(rule):
|
def rule_to_text(rule):
|
||||||
@ -172,7 +238,35 @@ def test(return_tests=False): # {{{
|
|||||||
ae = unittest.TestCase.assertEqual
|
ae = unittest.TestCase.assertEqual
|
||||||
|
|
||||||
def test_matching(self):
|
def test_matching(self):
|
||||||
pass
|
from html5_parser import parse
|
||||||
|
root = parse(namespace_elements=True, html='''
|
||||||
|
<html id='root'>
|
||||||
|
<head id='head'></head>
|
||||||
|
<body id='body'>
|
||||||
|
<p class="one red" id='p1'>
|
||||||
|
<p class="two green" id='p2'>
|
||||||
|
''')
|
||||||
|
all_ids = root.xpath('//*/@id')
|
||||||
|
|
||||||
|
def q(mt, query=''):
|
||||||
|
r = Rule({'match_type': mt, 'query': query, 'actions':[]})
|
||||||
|
ans = []
|
||||||
|
for tag in r.selector(root):
|
||||||
|
ans.append(tag.get('id'))
|
||||||
|
return ans
|
||||||
|
|
||||||
|
def t(mt, query='', expected=[]):
|
||||||
|
self.ae(expected, q(mt, query))
|
||||||
|
|
||||||
|
t('*', expected=all_ids)
|
||||||
|
t('is', 'body', ['body'])
|
||||||
|
t('is', 'p', ['p1', 'p2'])
|
||||||
|
t('has_class', 'one', ['p1'])
|
||||||
|
ei = list(all_ids)
|
||||||
|
ei.remove('p1')
|
||||||
|
t('not_has_class', 'one', ei)
|
||||||
|
t('css', '#body > p.red', ['p1'])
|
||||||
|
t('xpath', '//h:body', ['body'])
|
||||||
|
|
||||||
def test_validate_rule(self):
|
def test_validate_rule(self):
|
||||||
def av(match_type='*', query='', atype='remove', adata=''):
|
def av(match_type='*', query='', atype='remove', adata=''):
|
||||||
|
@ -11,7 +11,7 @@ from qt.core import (
|
|||||||
|
|
||||||
from calibre import prepare_string_for_xml
|
from calibre import prepare_string_for_xml
|
||||||
from calibre.ebooks.html_transform_rules import (
|
from calibre.ebooks.html_transform_rules import (
|
||||||
ACTION_MAP, MATCH_TYPE_MAP, compile_rules, export_rules, import_rules,
|
ACTION_MAP, MATCH_TYPE_MAP, export_rules, import_rules,
|
||||||
validate_rule
|
validate_rule
|
||||||
)
|
)
|
||||||
from calibre.gui2 import choose_files, choose_save_file, elided_text, error_dialog
|
from calibre.gui2 import choose_files, choose_save_file, elided_text, error_dialog
|
||||||
@ -311,12 +311,12 @@ class Rules(RulesBase): # {{{
|
|||||||
|
|
||||||
class Tester(Dialog): # {{{
|
class Tester(Dialog): # {{{
|
||||||
|
|
||||||
DIALOG_TITLE = _('Test style transform rules')
|
DIALOG_TITLE = _('Test HTML transform rules')
|
||||||
PREFS_NAME = 'test-style-transform-rules'
|
PREFS_NAME = 'test-html-transform-rules'
|
||||||
LABEL = _('Enter a CSS stylesheet below to test')
|
LABEL = _('Enter an HTML document below to test')
|
||||||
|
|
||||||
def __init__(self, rules, parent=None):
|
def __init__(self, rules, parent=None):
|
||||||
self.rules = compile_rules(rules)
|
self.rules = rules
|
||||||
Dialog.__init__(self, self.DIALOG_TITLE, self.PREFS_NAME, parent=parent)
|
Dialog.__init__(self, self.DIALOG_TITLE, self.PREFS_NAME, parent=parent)
|
||||||
|
|
||||||
def setup_ui(self):
|
def setup_ui(self):
|
||||||
@ -326,7 +326,7 @@ class Tester(Dialog): # {{{
|
|||||||
self.la = la = QLabel(self.LABEL)
|
self.la = la = QLabel(self.LABEL)
|
||||||
l.addWidget(la)
|
l.addWidget(la)
|
||||||
self.css = t = TextEdit(self)
|
self.css = t = TextEdit(self)
|
||||||
t.load_text('/* %s */\n' % _('Enter CSS rules below and click the "Test" button'), 'css')
|
t.load_text('<!-- %s -->\n' % _('Enter the HTML below and click the "Test" button'), 'html')
|
||||||
la.setBuddy(t)
|
la.setBuddy(t)
|
||||||
c = t.textCursor()
|
c = t.textCursor()
|
||||||
c.movePosition(QTextCursor.MoveOperation.End)
|
c.movePosition(QTextCursor.MoveOperation.End)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user