Merge branch 'py3' of https://github.com/eli-schwartz/calibre

2025-12-31 09:10:25 -05:00 · 2019-09-13 07:16:13 +05:30 · 2019-09-13 07:16:13 +05:30 · 1bb9f07886
commit 1bb9f07886
parent 2866bffecf 2876724325
1 changed files with 14 additions and 12 deletions
--- a/src/calibre/ebooks/compression/tcr.py
+++ b/src/calibre/ebooks/compression/tcr.py
@ -1,5 +1,4 @@
 # -*- coding: utf-8 -*-
-
 from __future__ import absolute_import, division, print_function, unicode_literals

 __license__ = 'GPL 3'
@ -7,7 +6,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
 __docformat__ = 'restructuredtext en'

 import re
-from polyglot.builtins import range, int_to_byte
+from polyglot.builtins import int_to_byte, is_py3, range


 class TCRCompressor(object):
@ -35,16 +34,18 @@ class TCRCompressor(object):
        The intent is to create more unused codes.
        '''
        possible_codes = []
-        a_code = set(re.findall(b'(?msu).', self.coded_txt))
+        a_code = set(re.findall(b'(?ms).', self.coded_txt))

        for code in a_code:
-            single_code = set(re.findall(b'(?msu)%s.' % re.escape(code), self.coded_txt))
+            single_code = set(re.findall(b'(?ms)%s.' % re.escape(code), self.coded_txt))
            if len(single_code) == 1:
                possible_codes.append(single_code.pop())

        for code in possible_codes:
+            if not is_py3:
+                code = bytearray(code)
            self.coded_txt = self.coded_txt.replace(code, code[0:1])
-            self.codes[ord(code[0:1])] = b'%s%s' % (self.codes[ord(code[0:1])], self.codes[ord(code[1:2])])
+            self.codes[code[0]] = b'%s%s' % (self.codes[code[0]], self.codes[code[1]])

    def _free_unused_codes(self):
        '''
@ -60,7 +61,7 @@ class TCRCompressor(object):
        '''
        Create new codes from codes that occur in pairs often.
        '''
-        possible_new_codes = list(set(re.findall(b'(?msu)..', self.coded_txt)))
+        possible_new_codes = list(set(re.findall(b'(?ms)..', self.coded_txt)))
        new_codes_count = []

        for c in possible_new_codes:
@ -77,11 +78,12 @@ class TCRCompressor(object):
    def compress(self, txt):
        self._reset()

-        self.codes = list(set(re.findall(b'(?msu).', txt)))
+        self.codes = list(set(re.findall(b'(?ms).', txt)))

        # Replace the text with their corresponding code
-        for c in txt:
-            self.coded_txt += int_to_byte(self.codes.index(c))
+        # FIXME: python3 is native bytearray, but all we want are bytes
+        for c in bytearray(txt):
+            self.coded_txt += int_to_byte(self.codes.index(int_to_byte(c)))

        # Zero the unused codes and record which are unused.
        for i in range(len(self.codes), 256):
@ -96,9 +98,9 @@ class TCRCompressor(object):
                unused_code = self.unused_codes.pop()
                # Take the last possible codes and split it into individual
                # codes. The last possible code is the most often occurring.
-                code1, code2 = possible_codes.pop()
-                self.codes[unused_code] = b'%s%s' % (self.codes[ord(code1)], self.codes[ord(code2)])
-                self.coded_txt = self.coded_txt.replace(b'%s%s' % (code1, code2), int_to_byte(unused_code))
+                code = possible_codes.pop()
+                self.codes[unused_code] = b'%s%s' % (self.codes[ord(code[0:1])], self.codes[ord(code[1:2])])
+                self.coded_txt = self.coded_txt.replace(code, int_to_byte(unused_code))
            self._combine_codes()
            self._free_unused_codes()
            possible_codes = self._new_codes()