From cb4beb395a3b73c3e4e97ae3667e6a1d82ef9027 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 2 May 2012 22:07:29 +0530 Subject: [PATCH] Fix #991380 (RegEx broken, Fatal Error Converting txt -> mobi) --- src/calibre/ebooks/conversion/utils.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/conversion/utils.py b/src/calibre/ebooks/conversion/utils.py index 2c1a5cd4d3..86a4668b9b 100644 --- a/src/calibre/ebooks/conversion/utils.py +++ b/src/calibre/ebooks/conversion/utils.py @@ -179,8 +179,12 @@ class HeuristicProcessor(object): for match in re.finditer(pat, search_text): ital_string = str(match.group('words')) #self.log.debug("italicising "+str(match.group(0))+" with "+ital_string+"") - html = re.sub(re.escape(str(match.group(0))), '%s' % ital_string, html) - + try: + html = re.sub(re.escape(str(match.group(0))), '%s' % ital_string, html) + except OverflowError: + # match.group(0) was too large to be compiled into a regex + continue + return html def markup_chapters(self, html, wordcount, blanks_between_paragraphs): @@ -319,13 +323,13 @@ class HeuristicProcessor(object): ''' Unwraps lines based on line length and punctuation supports a range of html markup and text files - + the lookahead regex below is meant look for any non-full stop characters - punctuation characters which can be used as a full stop should *not* be added below - e.g. ?!“”. etc the reason for this is to prevent false positive wrapping. False positives are more difficult to detect than false negatives during a manual review of the doc - - This function intentionally leaves hyphenated content alone as that is handled by the + + This function intentionally leaves hyphenated content alone as that is handled by the dehyphenate routine in a separate step '''