Conversion: Fix a hang caused by long sequences of non-word characters when heuristics are enabled, either explicitly or for some input formats such as TXT. Fixes #1919260 [Some magic text takes forever to convert](https://bugs.launchpad.net/calibre/+bug/1919260)

This commit is contained in:
Kovid Goyal 2021-03-16 08:38:59 +05:30
parent 32f2742e42
commit a8619a5967
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -583,7 +583,7 @@ class HeuristicProcessor(object):
def detect_scene_breaks(self, html):
scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?)+)\\s*'+self.line_close
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?){1,10})\\s*'+self.line_close
scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
html = scene_breaks.sub(self.scene_break_open+'\\g<break>'+'</p>', html)
return html