Merge branch 'py3' of https://github.com/eli-schwartz/calibre

2025-11-30 18:25:03 -05:00 · 2019-04-11 08:58:25 +05:30 · 2019-04-11 08:58:25 +05:30 · 0e1a89c338
commit 0e1a89c338
parent 75d3542287 92052e79d4
2 changed files with 18 additions and 18 deletions
--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -79,17 +79,17 @@ class MobiReader(object):
            stream = open(filename_or_stream, 'rb')

        raw = stream.read()
-        if raw.startswith('TPZ'):
+        if raw.startswith(b'TPZ'):
            raise TopazError(_('This is an Amazon Topaz book. It cannot be processed.'))
        if raw.startswith(b'\xeaDRMION\xee'):
            raise KFXError()

        self.header   = raw[0:72]
-        self.name     = self.header[:32].replace('\x00', '')
+        self.name     = self.header[:32].replace(b'\x00', b'')
        self.num_sections, = struct.unpack('>H', raw[76:78])

        self.ident = self.header[0x3C:0x3C + 8].upper()
-        if self.ident not in ['BOOKMOBI', 'TEXTREAD']:
+        if self.ident not in [b'BOOKMOBI', b'TEXTREAD']:
            raise MobiError('Unknown book type: %s' % repr(self.ident))

        self.sections = []
@ -750,7 +750,7 @@ class MobiReader(object):
        def sizeof_trailing_entry(ptr, psize):
            bitpos, result = 0, 0
            while True:
-                v = ord(ptr[psize-1])
+                v = ord(ptr[psize-1:psize])
                result |= (v & 0x7F) << bitpos
                bitpos += 7
                psize -= 1
@ -769,7 +769,8 @@ class MobiReader(object):
                    return 0
            flags >>= 1
        if self.book_header.extra_flags & 1:
-            num += (ord(data[size - num - 1]) & 0x3) + 1
+            off = size - num - 1
+            num += (ord(data[off:off+1]) & 0x3) + 1
        return num

    def warn_about_trailing_entry_corruption(self):
@ -791,7 +792,7 @@ class MobiReader(object):

        self.mobi_html = b''

-        if self.book_header.compression_type == 'DH':
+        if self.book_header.compression_type == b'DH':
            huffs = [self.sections[i][0] for i in
                range(self.book_header.huff_offset,
                    self.book_header.huff_offset + self.book_header.huff_number)]
@ -800,23 +801,23 @@ class MobiReader(object):
            huff = HuffReader(huffs)
            unpack = huff.unpack

-        elif self.book_header.compression_type == '\x00\x02':
+        elif self.book_header.compression_type == b'\x00\x02':
            unpack = decompress_doc

-        elif self.book_header.compression_type == '\x00\x01':
+        elif self.book_header.compression_type == b'\x00\x01':
            unpack = lambda x: x
        else:
-            raise MobiError('Unknown compression algorithm: %s' % repr(self.book_header.compression_type))
+            raise MobiError('Unknown compression algorithm: %r' % self.book_header.compression_type)
        self.mobi_html = b''.join(map(unpack, text_sections))
        if self.mobi_html.endswith(b'#'):
            self.mobi_html = self.mobi_html[:-1]

-        if self.book_header.ancient and '<html' not in self.mobi_html[:300].lower():
-            self.mobi_html = self.mobi_html.replace('\r ', '\n\n ')
-        self.mobi_html = self.mobi_html.replace('\0', '')
+        if self.book_header.ancient and b'<html' not in self.mobi_html[:300].lower():
+            self.mobi_html = self.mobi_html.replace(b'\r ', b'\n\n ')
+        self.mobi_html = self.mobi_html.replace(b'\0', b'')
        if self.book_header.codec == 'cp1252':
-            self.mobi_html = self.mobi_html.replace('\x1e', '')  # record separator
-            self.mobi_html = self.mobi_html.replace('\x02', '')  # start of text
+            self.mobi_html = self.mobi_html.replace(b'\x1e', b'')  # record separator
+            self.mobi_html = self.mobi_html.replace(b'\x02', b'')  # start of text
        return processed_records

    def replace_page_breaks(self):
--- a/src/calibre/ebooks/mobi/reader/mobi8.py
+++ b/src/calibre/ebooks/mobi/reader/mobi8.py
@ -24,7 +24,7 @@ from calibre.ebooks.metadata.toc import TOC
 from calibre.ebooks.mobi.utils import read_font_record
 from calibre.ebooks.oeb.parse_utils import parse_html
 from calibre.ebooks.oeb.base import XPath, XHTML, xml2text
-from polyglot.builtins import range, zip
+from polyglot.builtins import range, zip, unicode_type
 from polyglot.urllib import urldefrag

 Part = namedtuple('Part',
@ -85,7 +85,7 @@ class Mobi8Reader(object):

    def __call__(self):
        self.mobi6_reader.check_for_drm()
-        self.aid_anchor_suffix = bytes(uuid4().hex)
+        self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
        bh = self.mobi6_reader.book_header
        if self.mobi6_reader.kf8_type == 'joint':
            offset = self.mobi6_reader.kf8_boundary + 2
@ -225,8 +225,7 @@ class Mobi8Reader(object):
            self.parts.append(skeleton)
            if divcnt < 1:
                # Empty file
-                import uuid
-                aidtext = str(uuid.uuid4())
+                aidtext = unicode_type(uuid4())
                filename = aidtext + '.html'
            self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
                baseptr, aidtext))