From 48a701012c389097a67f19865dea47bfe327c400 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 19 Nov 2014 15:36:51 +0530 Subject: [PATCH] Handle HTML entities in the builtin S&R funcs --- src/calibre/ebooks/oeb/polish/utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/calibre/ebooks/oeb/polish/utils.py b/src/calibre/ebooks/oeb/polish/utils.py index fc3d067af6..7982bbc163 100644 --- a/src/calibre/ebooks/oeb/polish/utils.py +++ b/src/calibre/ebooks/oeb/polish/utils.py @@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal ' import re, os from bisect import bisect -from calibre import guess_type as _guess_type +from calibre import guess_type as _guess_type, prepare_string_for_xml, replace_entities def guess_type(x): return _guess_type(x)[0] or 'application/octet-stream' @@ -172,12 +172,16 @@ def parse_css(data, fname='', is_declaration=False, decode=None, log_lev data = parser.parseString(data, href=fname, validate=False) return data -def apply_func_to_match_groups(match, func=icu_upper): +def handle_entities(text, func): + return prepare_string_for_xml(func(replace_entities(text))) + +def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities): '''Apply the specified function to individual groups in the match object (the result of re.search() or the whole match if no groups were defined. Returns the replaced string.''' found_groups = False i = 0 parts, pos = [], match.start() + f = lambda text:handle_entities(text, func) while True: i += 1 try: @@ -187,10 +191,10 @@ def apply_func_to_match_groups(match, func=icu_upper): found_groups = True if start > -1: parts.append(match.string[pos:start]) - parts.append(func(match.string[start:end])) + parts.append(f(match.string[start:end])) pos = end if not found_groups: - return func(match.group()) + return f(match.group()) parts.append(match.string[pos:match.end()]) return ''.join(parts)