mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1344061 [erroneus regex in preprocess.py](https://bugs.launchpad.net/calibre/+bug/1344061)
This commit is contained in:
parent
49b0726efa
commit
ea37193b68
@ -271,7 +271,7 @@ class Dehyphenator(object):
|
||||
elif format == 'txt':
|
||||
intextmatch = re.compile(u'(?<=.{%i})(?P<firstpart>[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P<wraptags>(\n(\u0020|\u0009)*)+)(?P<secondpart>[\w\d]+)'% length) # noqa
|
||||
elif format == 'individual_words':
|
||||
intextmatch = re.compile(u'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)')
|
||||
intextmatch = re.compile(u'(?!<)(?P<firstpart>[^\W\-]+)(-|‐)\s*(?P<secondpart>\w+)(?![^<]*?>)', re.UNICODE)
|
||||
elif format == 'html_cleanup':
|
||||
intextmatch = re.compile(u'(?P<firstpart>[^\W\-]+)(-|‐)\s*(?=<)(?P<wraptags></span>\s*(</[iubp]>\s*<[iubp][^>]*>\s*)?<span[^>]*>|</[iubp]>\s*<[iubp][^>]*>)?\s*(?P<secondpart>[\w\d]+)') # noqa
|
||||
elif format == 'txt_cleanup':
|
||||
|
Loading…
x
Reference in New Issue
Block a user