mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Handle HTML entities in the builtin S&R funcs
This commit is contained in:
parent
ef3509ebc5
commit
48a701012c
@ -9,7 +9,7 @@ __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
import re, os
|
||||
from bisect import bisect
|
||||
|
||||
from calibre import guess_type as _guess_type
|
||||
from calibre import guess_type as _guess_type, prepare_string_for_xml, replace_entities
|
||||
|
||||
def guess_type(x):
|
||||
return _guess_type(x)[0] or 'application/octet-stream'
|
||||
@ -172,12 +172,16 @@ def parse_css(data, fname='<string>', is_declaration=False, decode=None, log_lev
|
||||
data = parser.parseString(data, href=fname, validate=False)
|
||||
return data
|
||||
|
||||
def apply_func_to_match_groups(match, func=icu_upper):
|
||||
def handle_entities(text, func):
|
||||
return prepare_string_for_xml(func(replace_entities(text)))
|
||||
|
||||
def apply_func_to_match_groups(match, func=icu_upper, handle_entities=handle_entities):
|
||||
'''Apply the specified function to individual groups in the match object (the result of re.search() or
|
||||
the whole match if no groups were defined. Returns the replaced string.'''
|
||||
found_groups = False
|
||||
i = 0
|
||||
parts, pos = [], match.start()
|
||||
f = lambda text:handle_entities(text, func)
|
||||
while True:
|
||||
i += 1
|
||||
try:
|
||||
@ -187,10 +191,10 @@ def apply_func_to_match_groups(match, func=icu_upper):
|
||||
found_groups = True
|
||||
if start > -1:
|
||||
parts.append(match.string[pos:start])
|
||||
parts.append(func(match.string[start:end]))
|
||||
parts.append(f(match.string[start:end]))
|
||||
pos = end
|
||||
if not found_groups:
|
||||
return func(match.group())
|
||||
return f(match.group())
|
||||
parts.append(match.string[pos:match.end()])
|
||||
return ''.join(parts)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user