mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Merge branch 'py3' of https://github.com/eli-schwartz/calibre
This commit is contained in:
commit
1bb9f07886
@ -1,5 +1,4 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||||
|
|
||||||
__license__ = 'GPL 3'
|
__license__ = 'GPL 3'
|
||||||
@ -7,7 +6,7 @@ __copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
|||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import re
|
import re
|
||||||
from polyglot.builtins import range, int_to_byte
|
from polyglot.builtins import int_to_byte, is_py3, range
|
||||||
|
|
||||||
|
|
||||||
class TCRCompressor(object):
|
class TCRCompressor(object):
|
||||||
@ -35,16 +34,18 @@ class TCRCompressor(object):
|
|||||||
The intent is to create more unused codes.
|
The intent is to create more unused codes.
|
||||||
'''
|
'''
|
||||||
possible_codes = []
|
possible_codes = []
|
||||||
a_code = set(re.findall(b'(?msu).', self.coded_txt))
|
a_code = set(re.findall(b'(?ms).', self.coded_txt))
|
||||||
|
|
||||||
for code in a_code:
|
for code in a_code:
|
||||||
single_code = set(re.findall(b'(?msu)%s.' % re.escape(code), self.coded_txt))
|
single_code = set(re.findall(b'(?ms)%s.' % re.escape(code), self.coded_txt))
|
||||||
if len(single_code) == 1:
|
if len(single_code) == 1:
|
||||||
possible_codes.append(single_code.pop())
|
possible_codes.append(single_code.pop())
|
||||||
|
|
||||||
for code in possible_codes:
|
for code in possible_codes:
|
||||||
|
if not is_py3:
|
||||||
|
code = bytearray(code)
|
||||||
self.coded_txt = self.coded_txt.replace(code, code[0:1])
|
self.coded_txt = self.coded_txt.replace(code, code[0:1])
|
||||||
self.codes[ord(code[0:1])] = b'%s%s' % (self.codes[ord(code[0:1])], self.codes[ord(code[1:2])])
|
self.codes[code[0]] = b'%s%s' % (self.codes[code[0]], self.codes[code[1]])
|
||||||
|
|
||||||
def _free_unused_codes(self):
|
def _free_unused_codes(self):
|
||||||
'''
|
'''
|
||||||
@ -60,7 +61,7 @@ class TCRCompressor(object):
|
|||||||
'''
|
'''
|
||||||
Create new codes from codes that occur in pairs often.
|
Create new codes from codes that occur in pairs often.
|
||||||
'''
|
'''
|
||||||
possible_new_codes = list(set(re.findall(b'(?msu)..', self.coded_txt)))
|
possible_new_codes = list(set(re.findall(b'(?ms)..', self.coded_txt)))
|
||||||
new_codes_count = []
|
new_codes_count = []
|
||||||
|
|
||||||
for c in possible_new_codes:
|
for c in possible_new_codes:
|
||||||
@ -77,11 +78,12 @@ class TCRCompressor(object):
|
|||||||
def compress(self, txt):
|
def compress(self, txt):
|
||||||
self._reset()
|
self._reset()
|
||||||
|
|
||||||
self.codes = list(set(re.findall(b'(?msu).', txt)))
|
self.codes = list(set(re.findall(b'(?ms).', txt)))
|
||||||
|
|
||||||
# Replace the text with their corresponding code
|
# Replace the text with their corresponding code
|
||||||
for c in txt:
|
# FIXME: python3 is native bytearray, but all we want are bytes
|
||||||
self.coded_txt += int_to_byte(self.codes.index(c))
|
for c in bytearray(txt):
|
||||||
|
self.coded_txt += int_to_byte(self.codes.index(int_to_byte(c)))
|
||||||
|
|
||||||
# Zero the unused codes and record which are unused.
|
# Zero the unused codes and record which are unused.
|
||||||
for i in range(len(self.codes), 256):
|
for i in range(len(self.codes), 256):
|
||||||
@ -96,9 +98,9 @@ class TCRCompressor(object):
|
|||||||
unused_code = self.unused_codes.pop()
|
unused_code = self.unused_codes.pop()
|
||||||
# Take the last possible codes and split it into individual
|
# Take the last possible codes and split it into individual
|
||||||
# codes. The last possible code is the most often occurring.
|
# codes. The last possible code is the most often occurring.
|
||||||
code1, code2 = possible_codes.pop()
|
code = possible_codes.pop()
|
||||||
self.codes[unused_code] = b'%s%s' % (self.codes[ord(code1)], self.codes[ord(code2)])
|
self.codes[unused_code] = b'%s%s' % (self.codes[ord(code[0:1])], self.codes[ord(code[1:2])])
|
||||||
self.coded_txt = self.coded_txt.replace(b'%s%s' % (code1, code2), int_to_byte(unused_code))
|
self.coded_txt = self.coded_txt.replace(code, int_to_byte(unused_code))
|
||||||
self._combine_codes()
|
self._combine_codes()
|
||||||
self._free_unused_codes()
|
self._free_unused_codes()
|
||||||
possible_codes = self._new_codes()
|
possible_codes = self._new_codes()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user