mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Various MOBI fixes found while reviewing the previous py3 merge
This commit is contained in:
parent
134692af38
commit
b86e9f0f27
@ -28,6 +28,7 @@ class PalmDOCAttributes(object):
|
||||
|
||||
def __str__(self):
|
||||
return '%s: %s'%(self.name, bool(self.val))
|
||||
__unicode__ = __str__
|
||||
|
||||
def __init__(self, raw):
|
||||
self.val = struct.unpack(b'<H', raw)[0]
|
||||
@ -44,6 +45,7 @@ class PalmDOCAttributes(object):
|
||||
def __str__(self):
|
||||
attrs = '\n\t'.join([unicode_type(x) for x in self.attributes])
|
||||
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
|
||||
__unicode__ = __str__
|
||||
|
||||
|
||||
class PalmDB(object):
|
||||
@ -102,6 +104,7 @@ class PalmDB(object):
|
||||
ans.append('Number of records: %s'%self.number_of_records)
|
||||
|
||||
return '\n'.join(ans)
|
||||
__unicode__ = __str__
|
||||
# }}}
|
||||
|
||||
|
||||
@ -257,6 +260,8 @@ class EXTHHeader(object):
|
||||
for r in self.records:
|
||||
ans.append(unicode_type(r))
|
||||
return '\n'.join(ans)
|
||||
__unicode__ = __str__
|
||||
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||
from calibre.utils.imghdr import what
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||
from polyglot.builtins import unicode_type, range, iteritems, as_bytes
|
||||
from polyglot.builtins import unicode_type, range, iteritems, as_bytes, print_to_binary_file
|
||||
|
||||
|
||||
class TagX(object): # {{{
|
||||
@ -583,7 +583,7 @@ class TBSIndexing(object): # {{{
|
||||
types[tbs_type] += strings
|
||||
for typ, strings in iteritems(types):
|
||||
with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
|
||||
f.write('\n'.join(strings))
|
||||
f.write(as_bytes('\n'.join(strings)))
|
||||
|
||||
def dump_record(self, r, dat):
|
||||
ans = []
|
||||
@ -788,14 +788,15 @@ class MOBIFile(object): # {{{
|
||||
self.index_record.indices, self.mobi_header.type_raw)
|
||||
|
||||
def print_header(self, f=sys.stdout):
|
||||
print(unicode_type(self.palmdb).encode('utf-8'), file=f)
|
||||
print(file=f)
|
||||
print('Record headers:', file=f)
|
||||
p = print_to_binary_file(f)
|
||||
p(unicode_type(self.palmdb))
|
||||
p()
|
||||
p('Record headers:')
|
||||
for i, r in enumerate(self.records):
|
||||
print('%6d. %s'%(i, r.header), file=f)
|
||||
p('%6d. %s'%(i, r.header))
|
||||
|
||||
print(file=f)
|
||||
print(unicode_type(self.mobi_header).encode('utf-8'), file=f)
|
||||
p()
|
||||
p(unicode_type(self.mobi_header))
|
||||
# }}}
|
||||
|
||||
|
||||
@ -820,18 +821,20 @@ def inspect_mobi(mobi_file, ddir):
|
||||
if f.index_header is not None:
|
||||
f.index_record.alltext = alltext
|
||||
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
||||
print = print_to_binary_file(out)
|
||||
print(unicode_type(f.index_header), file=out)
|
||||
print('\n\n', file=out)
|
||||
if f.secondary_index_header is not None:
|
||||
print(unicode_type(f.secondary_index_header).encode('utf-8'), file=out)
|
||||
print(unicode_type(f.secondary_index_header), file=out)
|
||||
print('\n\n', file=out)
|
||||
if f.secondary_index_record is not None:
|
||||
print(unicode_type(f.secondary_index_record).encode('utf-8'), file=out)
|
||||
print(unicode_type(f.secondary_index_record), file=out)
|
||||
print('\n\n', file=out)
|
||||
print(unicode_type(f.cncx).encode('utf-8'), file=out)
|
||||
print(unicode_type(f.cncx), file=out)
|
||||
print('\n\n', file=out)
|
||||
print(unicode_type(f.index_record), file=out)
|
||||
with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
|
||||
print = print_to_binary_file(out)
|
||||
print(unicode_type(f.tbs_indexing), file=out)
|
||||
f.tbs_indexing.dump(ddir)
|
||||
|
||||
|
@ -17,7 +17,7 @@ from calibre.ebooks.mobi.utils import read_font_record, decode_tbs, RECORD_SIZE
|
||||
from calibre.ebooks.mobi.debug import format_bytes
|
||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||
from calibre.utils.imghdr import what
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type, zip
|
||||
from polyglot.builtins import iteritems, itervalues, map, unicode_type, zip, print_to_binary_file
|
||||
|
||||
|
||||
class FDST(object):
|
||||
@ -94,14 +94,15 @@ class MOBIFile(object):
|
||||
self.read_tbs()
|
||||
|
||||
def print_header(self, f=sys.stdout):
|
||||
print(unicode_type(self.mf.palmdb).encode('utf-8'), file=f)
|
||||
print(file=f)
|
||||
print('Record headers:', file=f)
|
||||
p = print_to_binary_file(f)
|
||||
p(unicode_type(self.mf.palmdb))
|
||||
p()
|
||||
p('Record headers:')
|
||||
for i, r in enumerate(self.mf.records):
|
||||
print('%6d. %s'%(i, r.header), file=f)
|
||||
p('%6d. %s'%(i, r.header))
|
||||
|
||||
print(file=f)
|
||||
print(unicode_type(self.mf.mobi8_header).encode('utf-8'), file=f)
|
||||
p()
|
||||
p(unicode_type(self.mf.mobi8_header))
|
||||
|
||||
def read_fdst(self):
|
||||
self.fdst = None
|
||||
|
@ -75,7 +75,7 @@ class EXTHHeader(object): # {{{
|
||||
# they are messed up in the PDB header
|
||||
try:
|
||||
title = self.decode(content)
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
elif idx == 524: # Lang code
|
||||
try:
|
||||
@ -83,7 +83,7 @@ class EXTHHeader(object): # {{{
|
||||
lang = canonicalize_lang(lang)
|
||||
if lang:
|
||||
self.mi.language = lang
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
elif idx == 525:
|
||||
try:
|
||||
@ -138,8 +138,8 @@ class EXTHHeader(object): # {{{
|
||||
self.mi.tags = list(set(self.mi.tags))
|
||||
elif idx == 106:
|
||||
try:
|
||||
self.mi.pubdate = parse_date(content, as_utc=False)
|
||||
except:
|
||||
self.mi.pubdate = parse_date(self.decode(content), as_utc=False)
|
||||
except Exception:
|
||||
pass
|
||||
elif idx == 108:
|
||||
self.mi.book_producer = clean_xml_chars(self.decode(content).strip())
|
||||
@ -165,7 +165,7 @@ class EXTHHeader(object): # {{{
|
||||
try:
|
||||
self.uuid = content.decode('ascii')
|
||||
self.mi.set_identifier('mobi-asin', self.uuid)
|
||||
except:
|
||||
except Exception:
|
||||
self.uuid = None
|
||||
elif idx == 116:
|
||||
self.start_offset, = struct.unpack(b'>L', content)
|
||||
@ -302,14 +302,14 @@ class MetadataHeader(BookHeader):
|
||||
try:
|
||||
if self.section_data(kf8_header_index-1) == b'BOUNDARY':
|
||||
return 'joint'
|
||||
except:
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def identity(self):
|
||||
self.stream.seek(60)
|
||||
ident = self.stream.read(8).upper()
|
||||
if ident not in [b'BOOKMOBI', b'TEXTREAD']:
|
||||
if ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||
raise MobiError('Unknown book type: %s' % ident)
|
||||
return ident
|
||||
|
||||
|
@ -123,6 +123,9 @@ class CNCX(object): # {{{
|
||||
|
||||
def iteritems(self):
|
||||
return iteritems(self.records)
|
||||
|
||||
def items(self):
|
||||
return iteritems(self.records)
|
||||
# }}}
|
||||
|
||||
|
||||
|
@ -89,7 +89,7 @@ class MobiReader(object):
|
||||
self.num_sections, = struct.unpack('>H', raw[76:78])
|
||||
|
||||
self.ident = self.header[0x3C:0x3C + 8].upper()
|
||||
if self.ident not in [b'BOOKMOBI', b'TEXTREAD']:
|
||||
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||
raise MobiError('Unknown book type: %s' % repr(self.ident))
|
||||
|
||||
self.sections = []
|
||||
|
@ -223,7 +223,7 @@ def get_trailing_data(record, extra_data_flags):
|
||||
if extra_data_flags & 0b1:
|
||||
# Only the first two bits are used for the size since there can
|
||||
# never be more than 3 trailing multibyte chars
|
||||
sz = (ord(record[-1]) & 0b11) + 1
|
||||
sz = (ord(record[-1:]) & 0b11) + 1
|
||||
consumed = 1
|
||||
if sz > consumed:
|
||||
data[0] = record[-sz:-consumed]
|
||||
@ -298,7 +298,7 @@ def decode_tbs(byts, flag_size=4):
|
||||
extra[0b0010] = x
|
||||
consumed += consumed2
|
||||
if flags & 0b0100:
|
||||
extra[0b0100] = ord(byts[0])
|
||||
extra[0b0100] = ord(byts[0:1])
|
||||
byts = byts[1:]
|
||||
consumed += 1
|
||||
if flags & 0b0001:
|
||||
|
@ -182,3 +182,19 @@ else:
|
||||
|
||||
def reload(module):
|
||||
return builtins.reload(module)
|
||||
|
||||
|
||||
def print_to_binary_file(fileobj, encoding='utf-8'):
|
||||
|
||||
def print(*a, **kw):
|
||||
f = kw.get('file', fileobj)
|
||||
if a:
|
||||
sep = as_bytes(kw.get('sep', ' '), encoding)
|
||||
for x in a:
|
||||
x = as_bytes(x, encoding)
|
||||
f.write(x)
|
||||
if x is not a[-1]:
|
||||
f.write(sep)
|
||||
f.write(as_bytes(kw.get('end', '\n')))
|
||||
|
||||
return print
|
||||
|
Loading…
x
Reference in New Issue
Block a user