Handle HTML entities in the builtin S&R funcs

This commit is contained in:
Kovid Goyal 2014-11-19 15:36:51 +05:30
parent ef3509ebc5
commit 48a701012c

View File

@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re, os import re, os
from bisect import bisect from bisect import bisect
from calibre import guess_type as _guess_type from calibre import guess_type as _guess_type, prepare_string_for_xml, replace_entities
def guess_type(x): def guess_type(x):
return _guess_type(x)[0] or 'application/octet-stream' return _guess_type(x)[0] or 'application/octet-stream'
@ -172,12 +172,16 @@ def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_lev
data = parser.parseString(data, href=fname, validate=False) data = parser.parseString(data, href=fname, validate=False)
return data return data
def apply_func_to_match_groups(match, func=icu_upper): def handle_entities(text, func):
return prepare_string_for_xml(func(replace_entities(text)))
def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities):
'''Apply the specified function to individual groups in the match object (the result of re.search() or '''Apply the specified function to individual groups in the match object (the result of re.search() or
the whole match if no groups were defined. Returns the replaced string.''' the whole match if no groups were defined. Returns the replaced string.'''
found_groups = False found_groups = False
i = 0 i = 0
parts, pos = [], match.start() parts, pos = [], match.start()
f = lambda text:handle_entities(text, func)
while True: while True:
i += 1 i += 1
try: try:
@ -187,10 +191,10 @@ def apply_func_to_match_groups(match, func=icu_upper):
found_groups = True found_groups = True
if start > -1: if start > -1:
parts.append(match.string[pos:start]) parts.append(match.string[pos:start])
parts.append(func(match.string[start:end])) parts.append(f(match.string[start:end]))
pos = end pos = end
if not found_groups: if not found_groups:
return func(match.group()) return f(match.group())
parts.append(match.string[pos:match.end()]) parts.append(match.string[pos:match.end()])
return ''.join(parts) return ''.join(parts)