From ea37193b6824767e4087d63ba8204611f5118878 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sat, 19 Jul 2014 00:56:47 +0530 Subject: [PATCH] Fix #1344061 [erroneus regex in preprocess.py](https://bugs.launchpad.net/calibre/+bug/1344061) --- src/calibre/ebooks/conversion/preprocess.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/conversion/preprocess.py b/src/calibre/ebooks/conversion/preprocess.py index 22e7c09cb2..d28cd7058b 100644 --- a/src/calibre/ebooks/conversion/preprocess.py +++ b/src/calibre/ebooks/conversion/preprocess.py @@ -271,7 +271,7 @@ class Dehyphenator(object): elif format == 'txt': intextmatch = re.compile(u'(?<=.{%i})(?P[^\W\-]+)(-|‐)(\u0020|\u0009)*(?P(\n(\u0020|\u0009)*)+)(?P[\w\d]+)'% length) # noqa elif format == 'individual_words': - intextmatch = re.compile(u'(?!<)(?P[^\W\-]+)(-|‐)\s*(?P\w+)(?![^<]*?>)') + intextmatch = re.compile(u'(?!<)(?P[^\W\-]+)(-|‐)\s*(?P\w+)(?![^<]*?>)', re.UNICODE) elif format == 'html_cleanup': intextmatch = re.compile(u'(?P[^\W\-]+)(-|‐)\s*(?=<)(?P\s*(\s*<[iubp][^>]*>\s*)?]*>|\s*<[iubp][^>]*>)?\s*(?P[\w\d]+)') # noqa elif format == 'txt_cleanup':