mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #991380 (RegEx broken, Fatal Error Converting txt -> mobi)
This commit is contained in:
parent
7aeb6157f6
commit
cb4beb395a
@ -179,8 +179,12 @@ class HeuristicProcessor(object):
|
|||||||
for match in re.finditer(pat, search_text):
|
for match in re.finditer(pat, search_text):
|
||||||
ital_string = str(match.group('words'))
|
ital_string = str(match.group('words'))
|
||||||
#self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
|
#self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
|
||||||
html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
|
try:
|
||||||
|
html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
|
||||||
|
except OverflowError:
|
||||||
|
# match.group(0) was too large to be compiled into a regex
|
||||||
|
continue
|
||||||
|
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
|
def markup_chapters(self, html, wordcount, blanks_between_paragraphs):
|
||||||
@ -319,13 +323,13 @@ class HeuristicProcessor(object):
|
|||||||
'''
|
'''
|
||||||
Unwraps lines based on line length and punctuation
|
Unwraps lines based on line length and punctuation
|
||||||
supports a range of html markup and text files
|
supports a range of html markup and text files
|
||||||
|
|
||||||
the lookahead regex below is meant look for any non-full stop characters - punctuation
|
the lookahead regex below is meant look for any non-full stop characters - punctuation
|
||||||
characters which can be used as a full stop should *not* be added below - e.g. ?!“”. etc
|
characters which can be used as a full stop should *not* be added below - e.g. ?!“”. etc
|
||||||
the reason for this is to prevent false positive wrapping. False positives are more
|
the reason for this is to prevent false positive wrapping. False positives are more
|
||||||
difficult to detect than false negatives during a manual review of the doc
|
difficult to detect than false negatives during a manual review of the doc
|
||||||
|
|
||||||
This function intentionally leaves hyphenated content alone as that is handled by the
|
This function intentionally leaves hyphenated content alone as that is handled by the
|
||||||
dehyphenate routine in a separate step
|
dehyphenate routine in a separate step
|
||||||
'''
|
'''
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user