Cache XPath() invocations and make insert preserving indent re-useable

This commit is contained in:
Kovid Goyal 2025-02-17 11:28:44 +05:30
parent cc19aa0ae6
commit ec5584d7b5
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 30 additions and 22 deletions

View File

@ -12,6 +12,7 @@ import os
import re import re
import sys import sys
from collections import defaultdict from collections import defaultdict
from functools import lru_cache
from itertools import count from itertools import count
from operator import attrgetter from operator import attrgetter
@ -393,6 +394,7 @@ def isqname(name):
return name and QNAME_RE.match(name) is not None return name and QNAME_RE.match(name) is not None
@lru_cache(128)
def XPath(expr): def XPath(expr):
return etree.XPath(expr, namespaces=XPNSMAP) return etree.XPath(expr, namespaces=XPNSMAP)

View File

@ -47,7 +47,7 @@ from calibre.ebooks.oeb.base import (
from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html
from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook
from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak
from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, parse_css from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css
from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile
from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail
from calibre.utils.ipc.simple_worker import WorkerError, fork_job from calibre.utils.ipc.simple_worker import WorkerError, fork_job
@ -942,27 +942,7 @@ class Container(ContainerBase): # {{{
def insert_into_xml(self, parent, item, index=None): def insert_into_xml(self, parent, item, index=None):
'''Insert item into parent (or append if index is None), fixing '''Insert item into parent (or append if index is None), fixing
indentation. Only works with self closing items.''' indentation. Only works with self closing items.'''
if index is None: insert_self_closing(parent, item, index)
parent.append(item)
else:
parent.insert(index, item)
idx = parent.index(item)
if idx == 0:
item.tail = parent.text
# If this is the only child of this parent element, we need a
# little extra work as we have gone from a self-closing <foo />
# element to <foo><item /></foo>
if len(parent) == 1:
sibling = parent.getprevious()
if sibling is None:
# Give up!
return
parent.text = sibling.text
item.tail = sibling.tail
else:
item.tail = parent[idx-1].tail
if idx == len(parent)-1:
parent[idx-1].tail = parent.text
def opf_get_or_create(self, name): def opf_get_or_create(self, name):
''' Convenience method to either return the first XML element with the ''' Convenience method to either return the first XML element with the

View File

@ -291,3 +291,29 @@ def extract(elem):
p[idx-1].tail = (p[idx-1].tail or '') + elem.tail p[idx-1].tail = (p[idx-1].tail or '') + elem.tail
else: else:
p.text = (p.text or '') + elem.tail p.text = (p.text or '') + elem.tail
def insert_self_closing(parent, item, index=None):
'''Insert item into parent (or append if index is None), fixing
indentation. Only works with self closing items.'''
if index is None:
parent.append(item)
else:
parent.insert(index, item)
idx = parent.index(item)
if idx == 0:
item.tail = parent.text
# If this is the only child of this parent element, we need a
# little extra work as we have gone from a self-closing <foo />
# element to <foo><item /></foo>
if len(parent) == 1:
sibling = parent.getprevious()
if sibling is None:
# Give up!
return
parent.text = sibling.text
item.tail = sibling.tail
else:
item.tail = parent[idx-1].tail
if idx == len(parent)-1:
parent[idx-1].tail = parent.text