Fix a regression that broke the conversion of files that contain very long passages of text (more than 100MB worth). Fixes #809501 (memory error during e-book conversion)

This commit is contained in:
Kovid Goyal 2011-07-12 15:01:50 -06:00
parent f0fdbab3ec
commit 928ba3bc1b

View File

@ -38,8 +38,12 @@ ENCODING_PATS = [
ENTITY_PATTERN = re.compile(r'&(\S+?);')
def strip_encoding_declarations(raw):
limit = 50*1024
for pat in ENCODING_PATS:
raw = pat.sub('', raw)
prefix = raw[:limit]
suffix = raw[limit:]
prefix = pat.sub('', prefix)
raw = prefix + suffix
return raw
def substitute_entites(raw):