mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Conversion: Fix a hang caused by long sequences of non-word characters when heuristics are enabled, either explicitly or for some input formats such as TXT. Fixes #1919260 [Some magic text takes forever to convert](https://bugs.launchpad.net/calibre/+bug/1919260)
This commit is contained in:
parent
32f2742e42
commit
a8619a5967
@ -583,7 +583,7 @@ class HeuristicProcessor(object):
|
||||
|
||||
def detect_scene_breaks(self, html):
|
||||
scene_break_regex = self.line_open+'(?!('+self.common_in_text_beginnings+'|.*?'+self.common_in_text_endings+ \
|
||||
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?)+)\\s*'+self.line_close
|
||||
'<))(?P<break>((?P<break_char>((?!\\s)\\W))\\s*(?P=break_char)?){1,10})\\s*'+self.line_close
|
||||
scene_breaks = re.compile(r'%s' % scene_break_regex, re.IGNORECASE|re.UNICODE)
|
||||
html = scene_breaks.sub(self.scene_break_open+'\\g<break>'+'</p>', html)
|
||||
return html
|
||||
|
Loading…
x
Reference in New Issue
Block a user