py3: Various MOBI fixes found while reviewing the previous py3 merge

2025-07-09 03:04:10 -04:00 · 2019-07-07 18:14:13 +05:30 · 2019-07-07 18:14:13 +05:30 · b86e9f0f27
commit b86e9f0f27
parent 134692af38
8 changed files with 56 additions and 28 deletions
--- a/src/calibre/ebooks/mobi/debug/headers.py
+++ b/src/calibre/ebooks/mobi/debug/headers.py
@ -28,6 +28,7 @@ class PalmDOCAttributes(object):

        def __str__(self):
            return '%s: %s'%(self.name, bool(self.val))
+        __unicode__ = __str__

    def __init__(self, raw):
        self.val = struct.unpack(b'<H', raw)[0]
@ -44,6 +45,7 @@ class PalmDOCAttributes(object):
    def __str__(self):
        attrs = '\n\t'.join([unicode_type(x) for x in self.attributes])
        return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
+    __unicode__ = __str__


 class PalmDB(object):
@ -102,6 +104,7 @@ class PalmDB(object):
        ans.append('Number of records: %s'%self.number_of_records)

        return '\n'.join(ans)
+    __unicode__ = __str__
 # }}}


@ -257,6 +260,8 @@ class EXTHHeader(object):
        for r in self.records:
            ans.append(unicode_type(r))
        return '\n'.join(ans)
+    __unicode__ = __str__
+
 # }}}


--- a/src/calibre/ebooks/mobi/debug/mobi6.py
+++ b/src/calibre/ebooks/mobi/debug/mobi6.py
@ -19,7 +19,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
 from calibre.utils.imghdr import what
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.debug.headers import TextRecord
-from polyglot.builtins import unicode_type, range, iteritems, as_bytes
+from polyglot.builtins import unicode_type, range, iteritems, as_bytes, print_to_binary_file


 class TagX(object):  # {{{
@ -583,7 +583,7 @@ class TBSIndexing(object):  # {{{
            types[tbs_type] += strings
        for typ, strings in iteritems(types):
            with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
-                f.write('\n'.join(strings))
+                f.write(as_bytes('\n'.join(strings)))

    def dump_record(self, r, dat):
        ans = []
@ -788,14 +788,15 @@ class MOBIFile(object):  # {{{
                    self.index_record.indices, self.mobi_header.type_raw)

    def print_header(self, f=sys.stdout):
-        print(unicode_type(self.palmdb).encode('utf-8'), file=f)
-        print(file=f)
-        print('Record headers:', file=f)
+        p = print_to_binary_file(f)
+        p(unicode_type(self.palmdb))
+        p()
+        p('Record headers:')
        for i, r in enumerate(self.records):
-            print('%6d. %s'%(i, r.header), file=f)
+            p('%6d. %s'%(i, r.header))

-        print(file=f)
-        print(unicode_type(self.mobi_header).encode('utf-8'), file=f)
+        p()
+        p(unicode_type(self.mobi_header))
 # }}}


@ -820,18 +821,20 @@ def inspect_mobi(mobi_file, ddir):
    if f.index_header is not None:
        f.index_record.alltext = alltext
        with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
+            print = print_to_binary_file(out)
            print(unicode_type(f.index_header), file=out)
            print('\n\n', file=out)
            if f.secondary_index_header is not None:
-                print(unicode_type(f.secondary_index_header).encode('utf-8'), file=out)
+                print(unicode_type(f.secondary_index_header), file=out)
                print('\n\n', file=out)
            if f.secondary_index_record is not None:
-                print(unicode_type(f.secondary_index_record).encode('utf-8'), file=out)
+                print(unicode_type(f.secondary_index_record), file=out)
                print('\n\n', file=out)
-            print(unicode_type(f.cncx).encode('utf-8'), file=out)
+            print(unicode_type(f.cncx), file=out)
            print('\n\n', file=out)
            print(unicode_type(f.index_record), file=out)
        with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
+            print = print_to_binary_file(out)
            print(unicode_type(f.tbs_indexing), file=out)
        f.tbs_indexing.dump(ddir)

--- a/src/calibre/ebooks/mobi/debug/mobi8.py
+++ b/src/calibre/ebooks/mobi/debug/mobi8.py
@ -17,7 +17,7 @@ from calibre.ebooks.mobi.utils import read_font_record, decode_tbs, RECORD_SIZE
 from calibre.ebooks.mobi.debug import format_bytes
 from calibre.ebooks.mobi.reader.headers import NULL_INDEX
 from calibre.utils.imghdr import what
-from polyglot.builtins import iteritems, itervalues, map, unicode_type, zip
+from polyglot.builtins import iteritems, itervalues, map, unicode_type, zip, print_to_binary_file


 class FDST(object):
@ -94,14 +94,15 @@ class MOBIFile(object):
        self.read_tbs()

    def print_header(self, f=sys.stdout):
-        print(unicode_type(self.mf.palmdb).encode('utf-8'), file=f)
-        print(file=f)
-        print('Record headers:', file=f)
+        p = print_to_binary_file(f)
+        p(unicode_type(self.mf.palmdb))
+        p()
+        p('Record headers:')
        for i, r in enumerate(self.mf.records):
-            print('%6d. %s'%(i, r.header), file=f)
+            p('%6d. %s'%(i, r.header))

-        print(file=f)
-        print(unicode_type(self.mf.mobi8_header).encode('utf-8'), file=f)
+        p()
+        p(unicode_type(self.mf.mobi8_header))

    def read_fdst(self):
        self.fdst = None
--- a/src/calibre/ebooks/mobi/reader/headers.py
+++ b/src/calibre/ebooks/mobi/reader/headers.py
@ -75,7 +75,7 @@ class EXTHHeader(object):  # {{{
                # they are messed up in the PDB header
                try:
                    title = self.decode(content)
-                except:
+                except Exception:
                    pass
            elif idx == 524:  # Lang code
                try:
@ -83,7 +83,7 @@ class EXTHHeader(object):  # {{{
                    lang = canonicalize_lang(lang)
                    if lang:
                        self.mi.language = lang
-                except:
+                except Exception:
                    pass
            elif idx == 525:
                try:
@ -138,8 +138,8 @@ class EXTHHeader(object):  # {{{
            self.mi.tags = list(set(self.mi.tags))
        elif idx == 106:
            try:
-                self.mi.pubdate = parse_date(content, as_utc=False)
-            except:
+                self.mi.pubdate = parse_date(self.decode(content), as_utc=False)
+            except Exception:
                pass
        elif idx == 108:
            self.mi.book_producer = clean_xml_chars(self.decode(content).strip())
@ -165,7 +165,7 @@ class EXTHHeader(object):  # {{{
            try:
                self.uuid = content.decode('ascii')
                self.mi.set_identifier('mobi-asin', self.uuid)
-            except:
+            except Exception:
                self.uuid = None
        elif idx == 116:
            self.start_offset, = struct.unpack(b'>L', content)
@ -302,14 +302,14 @@ class MetadataHeader(BookHeader):
        try:
            if self.section_data(kf8_header_index-1) == b'BOUNDARY':
                return 'joint'
-        except:
+        except Exception:
            pass
        return None

    def identity(self):
        self.stream.seek(60)
        ident = self.stream.read(8).upper()
-        if ident not in [b'BOOKMOBI', b'TEXTREAD']:
+        if ident not in (b'BOOKMOBI', b'TEXTREAD'):
            raise MobiError('Unknown book type: %s' % ident)
        return ident

--- a/src/calibre/ebooks/mobi/reader/index.py
+++ b/src/calibre/ebooks/mobi/reader/index.py
@ -123,6 +123,9 @@ class CNCX(object):  # {{{

    def iteritems(self):
        return iteritems(self.records)
+
+    def items(self):
+        return iteritems(self.records)
 # }}}


--- a/src/calibre/ebooks/mobi/reader/mobi6.py
+++ b/src/calibre/ebooks/mobi/reader/mobi6.py
@ -89,7 +89,7 @@ class MobiReader(object):
        self.num_sections, = struct.unpack('>H', raw[76:78])

        self.ident = self.header[0x3C:0x3C + 8].upper()
-        if self.ident not in [b'BOOKMOBI', b'TEXTREAD']:
+        if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
            raise MobiError('Unknown book type: %s' % repr(self.ident))

        self.sections = []
--- a/src/calibre/ebooks/mobi/utils.py
+++ b/src/calibre/ebooks/mobi/utils.py
@ -223,7 +223,7 @@ def get_trailing_data(record, extra_data_flags):
    if extra_data_flags & 0b1:
        # Only the first two bits are used for the size since there can
        # never be more than 3 trailing multibyte chars
-        sz = (ord(record[-1]) & 0b11) + 1
+        sz = (ord(record[-1:]) & 0b11) + 1
        consumed = 1
        if sz > consumed:
            data[0] = record[-sz:-consumed]
@ -298,7 +298,7 @@ def decode_tbs(byts, flag_size=4):
        extra[0b0010] = x
        consumed += consumed2
    if flags & 0b0100:
-        extra[0b0100] = ord(byts[0])
+        extra[0b0100] = ord(byts[0:1])
        byts = byts[1:]
        consumed += 1
    if flags & 0b0001:
--- a/src/polyglot/builtins.py
+++ b/src/polyglot/builtins.py
@ -182,3 +182,19 @@ else:

    def reload(module):
        return builtins.reload(module)
+
+
+def print_to_binary_file(fileobj, encoding='utf-8'):
+
+    def print(*a, **kw):
+        f = kw.get('file', fileobj)
+        if a:
+            sep = as_bytes(kw.get('sep', ' '), encoding)
+            for x in a:
+                x = as_bytes(x, encoding)
+                f.write(x)
+                if x is not a[-1]:
+                    f.write(sep)
+        f.write(as_bytes(kw.get('end', '\n')))
+
+    return print