From 8311b262050e5105d1c7e35ff4509e437f9fc7ea Mon Sep 17 00:00:00 2001 From: Eli Schwartz Date: Wed, 1 May 2019 10:54:40 -0400 Subject: [PATCH] wip: tcr compression regex flags do not need "u" when the search string does not make use of the feature In python3, re.U is the default for unicode strings. For byte strings, which is what we use, it is a fatal error. --- src/calibre/ebooks/compression/tcr.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/calibre/ebooks/compression/tcr.py b/src/calibre/ebooks/compression/tcr.py index 565399eb4d..6abd243fbb 100644 --- a/src/calibre/ebooks/compression/tcr.py +++ b/src/calibre/ebooks/compression/tcr.py @@ -1,5 +1,4 @@ # -*- coding: utf-8 -*- - from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL 3' @@ -35,10 +34,10 @@ class TCRCompressor(object): The intent is to create more unused codes. ''' possible_codes = [] - a_code = set(re.findall(b'(?msu).', self.coded_txt)) + a_code = set(re.findall(b'(?ms).', self.coded_txt)) for code in a_code: - single_code = set(re.findall(b'(?msu)%s.' % re.escape(code), self.coded_txt)) + single_code = set(re.findall(b'(?ms)%s.' % re.escape(code), self.coded_txt)) if len(single_code) == 1: possible_codes.append(single_code.pop()) @@ -60,7 +59,7 @@ class TCRCompressor(object): ''' Create new codes from codes that occur in pairs often. ''' - possible_new_codes = list(set(re.findall(b'(?msu)..', self.coded_txt))) + possible_new_codes = list(set(re.findall(b'(?ms)..', self.coded_txt))) new_codes_count = [] for c in possible_new_codes: @@ -77,7 +76,7 @@ class TCRCompressor(object): def compress(self, txt): self._reset() - self.codes = list(set(re.findall(b'(?msu).', txt))) + self.codes = list(set(re.findall(b'(?ms).', txt))) # Replace the text with their corresponding code for c in txt: