Fix a regression that broke the conversion of files that contain very long passages of text (more than 100MB worth). Fixes #809501 (memory error during e-book conversion)

2025-07-09 03:04:10 -04:00 · 2011-07-12 15:01:50 -06:00 · 2011-07-12 15:01:50 -06:00 · 928ba3bc1b
commit 928ba3bc1b
parent f0fdbab3ec
1 changed files with 5 additions and 1 deletions
--- a/src/calibre/ebooks/chardet/init.py
+++ b/src/calibre/ebooks/chardet/init.py
@ -38,8 +38,12 @@ ENCODING_PATS = [
 ENTITY_PATTERN = re.compile(r'&(\S+?);')

 def strip_encoding_declarations(raw):
+    limit = 50*1024
    for pat in ENCODING_PATS:
-        raw = pat.sub('', raw)
+        prefix = raw[:limit]
+        suffix = raw[limit:]
+        prefix = pat.sub('', prefix)
+        raw = prefix + suffix
    return raw

 def substitute_entites(raw):