From ade7c9280a2241fb033669e596a8b89f72743fda Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 20 Oct 2012 09:07:03 +0530 Subject: [PATCH] Faster implementation of namespace hack --- src/calibre/ebooks/oeb/stylizer.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/oeb/stylizer.py b/src/calibre/ebooks/oeb/stylizer.py index 1579dde481..6b82f2f801 100644 --- a/src/calibre/ebooks/oeb/stylizer.py +++ b/src/calibre/ebooks/oeb/stylizer.py @@ -125,11 +125,17 @@ class CaseInsensitiveAttributesTranslator(HTMLTranslator): (id_selector.id.lower())) ci_css_to_xpath = CaseInsensitiveAttributesTranslator().css_to_xpath -NULL_NAMESPACE_REGEX = re.compile(ur'''name\(\) = ['"]h:''') + +NULL_NAMESPACE_REGEX = re.compile(ur'''(name\(\) = ['"])h:''') def fix_namespace(raw): - ans = NULL_NAMESPACE_REGEX.sub(lambda - m:m.group().replace(u'h:', u''), raw) - return ans + ''' + cssselect uses name() = 'h:p' to select tags for some CSS selectors (e.g. + h|p+h|p). + However, since for us the XHTML namespace is the default namespace (with no + prefix), name() is the same as local-name(). So this is a hack to + workaround the problem. + ''' + return NULL_NAMESPACE_REGEX.sub(ur'\1', raw) class CSSSelector(object):