From ec5584d7b56b7a9c3f8411b48331a98c98010f8c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Feb 2025 11:28:44 +0530 Subject: [PATCH] Cache XPath() invocations and make insert preserving indent re-useable --- src/calibre/ebooks/oeb/base.py | 2 ++ src/calibre/ebooks/oeb/polish/container.py | 24 ++------------------ src/calibre/ebooks/oeb/polish/utils.py | 26 ++++++++++++++++++++++ 3 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index e0e3d5a326..1192c870d0 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -12,6 +12,7 @@ import os import re import sys from collections import defaultdict +from functools import lru_cache from itertools import count from operator import attrgetter @@ -393,6 +394,7 @@ def isqname(name): return name and QNAME_RE.match(name) is not None +@lru_cache(128) def XPath(expr): return etree.XPath(expr, namespaces=XPNSMAP) diff --git a/src/calibre/ebooks/oeb/polish/container.py b/src/calibre/ebooks/oeb/polish/container.py index 9e41a1f1a5..1a5b117f8f 100644 --- a/src/calibre/ebooks/oeb/polish/container.py +++ b/src/calibre/ebooks/oeb/polish/container.py @@ -47,7 +47,7 @@ from calibre.ebooks.oeb.base import ( from calibre.ebooks.oeb.parse_utils import NotHTML, parse_html from calibre.ebooks.oeb.polish.errors import DRMError, InvalidBook from calibre.ebooks.oeb.polish.parsing import parse as parse_html_tweak -from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, parse_css +from calibre.ebooks.oeb.polish.utils import OEB_FONTS, CommentFinder, PositionFinder, adjust_mime_for_epub, guess_type, insert_self_closing, parse_css from calibre.ptempfile import PersistentTemporaryDirectory, PersistentTemporaryFile from calibre.utils.filenames import hardlink_file, nlinks_file, retry_on_fail from calibre.utils.ipc.simple_worker import WorkerError, fork_job @@ -942,27 +942,7 @@ class Container(ContainerBase): # {{{ def insert_into_xml(self, parent, item, index=None): '''Insert item into parent (or append if index is None), fixing indentation. Only works with self closing items.''' - if index is None: - parent.append(item) - else: - parent.insert(index, item) - idx = parent.index(item) - if idx == 0: - item.tail = parent.text - # If this is the only child of this parent element, we need a - # little extra work as we have gone from a self-closing - # element to - if len(parent) == 1: - sibling = parent.getprevious() - if sibling is None: - # Give up! - return - parent.text = sibling.text - item.tail = sibling.tail - else: - item.tail = parent[idx-1].tail - if idx == len(parent)-1: - parent[idx-1].tail = parent.text + insert_self_closing(parent, item, index) def opf_get_or_create(self, name): ''' Convenience method to either return the first XML element with the diff --git a/src/calibre/ebooks/oeb/polish/utils.py b/src/calibre/ebooks/oeb/polish/utils.py index 2313d8c28c..6cccf11d53 100644 --- a/src/calibre/ebooks/oeb/polish/utils.py +++ b/src/calibre/ebooks/oeb/polish/utils.py @@ -291,3 +291,29 @@ def extract(elem): p[idx-1].tail = (p[idx-1].tail or '') + elem.tail else: p.text = (p.text or '') + elem.tail + + +def insert_self_closing(parent, item, index=None): + '''Insert item into parent (or append if index is None), fixing + indentation. Only works with self closing items.''' + if index is None: + parent.append(item) + else: + parent.insert(index, item) + idx = parent.index(item) + if idx == 0: + item.tail = parent.text + # If this is the only child of this parent element, we need a + # little extra work as we have gone from a self-closing + # element to + if len(parent) == 1: + sibling = parent.getprevious() + if sibling is None: + # Give up! + return + parent.text = sibling.text + item.tail = sibling.tail + else: + item.tail = parent[idx-1].tail + if idx == len(parent)-1: + parent[idx-1].tail = parent.text