From 928ba3bc1b6445ee04b3f3ce9e702116ed9eddaa Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 12 Jul 2011 15:01:50 -0600 Subject: [PATCH] Fix a regression that broke the conversion of files that contain very long passages of text (more than 100MB worth). Fixes #809501 (memory error during e-book conversion) --- src/calibre/ebooks/chardet/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/chardet/__init__.py b/src/calibre/ebooks/chardet/__init__.py index 604cbdd360..95a44f8e56 100644 --- a/src/calibre/ebooks/chardet/__init__.py +++ b/src/calibre/ebooks/chardet/__init__.py @@ -38,8 +38,12 @@ ENCODING_PATS = [ ENTITY_PATTERN = re.compile(r'&(\S+?);') def strip_encoding_declarations(raw): + limit = 50*1024 for pat in ENCODING_PATS: - raw = pat.sub('', raw) + prefix = raw[:limit] + suffix = raw[limit:] + prefix = pat.sub('', prefix) + raw = prefix + suffix return raw def substitute_entites(raw):