diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py
index ef092f7954..3ff816b3bf 100644
--- a/src/calibre/ebooks/conversion/preprocess.py
+++ b/src/calibre/ebooks/conversion/preprocess.py
@@ -220,7 +220,7 @@ class Dehyphenator(object):
self.html = html
self.format = format
if format == 'html':
- intextmatch = re.compile(u'(?<=.{%i})(?P ]*>\s* |[iub]>\s* \s*<[iub]>)\s*(?P
\s*", "\n
", html) - + ###### Check Markup ###### # # some lit files don't have any
tags or equivalent (generally just plain text between @@ -191,10 +191,10 @@ class PreProcessor(object): n_lookahead_close = ")" default_title = r"(\s*[\w\'\"-]+){1,5}?(?=<)" - typical_chapters = r".?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}" - numeric_chapters = r".?(\d+\.?|(CHAPTER\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}))\s*" - uppercase_chapters = r"\s*.?([A-Z#]+(\s|-){0,3}){1,5}\s*" - numeric_titles = r".?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*" + typical_chapters = r"[^'\"]?(Introduction|Synopsis|Acknowledgements|Chapter|Kapitel|Epilogue|Volume\s|Prologue|Book\s|Part\s|Dedication)\s*([\d\w-]+\:?\s*){0,4}" + numeric_chapters = r"[^'\"]?(\d+\.?|CHAPTER)\s*([\dA-Z\-\'\"\?\.!#,]+\s*){1,10}\s*" + uppercase_chapters = r"\s*[^'\"]?([A-Z#]+(\s|-){0,3}){1,5}\s*" + numeric_titles = r"[^'\"]?(\d+\.?\s+([\d\w-]+\:?\'?-?\s?){0,5})\s*" emphasized_lines = r"]*>\s*(]*>)?\s*(?!([*#•]+\s*)+)(\s*(?=[\w#\-*\s]+<)([\w#-*]+\s*){1,5}\s*)()?\s*" full_chapter_line = chapter_line_open+chapter_header_open+typical_chapters+chapter_header_close+chapter_line_close