diff --git a/src/calibre/ebooks/conversion/plugins/htmlz_output.py b/src/calibre/ebooks/conversion/plugins/htmlz_output.py index 484dffa09e..4f507ea83d 100644 --- a/src/calibre/ebooks/conversion/plugins/htmlz_output.py +++ b/src/calibre/ebooks/conversion/plugins/htmlz_output.py @@ -126,7 +126,7 @@ class HTMLZOutput(OutputFormatPlugin): # Metadata with open(os.path.join(tdir, u'metadata.opf'), 'wb') as mdataf: - opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1()))) + opf = OPF(io.BytesIO(etree.tostring(oeb_book.metadata.to_opf1(), encoding='UTF-8'))) mi = opf.to_book_metadata() if cover_path: mi.cover = u'cover.jpg' diff --git a/src/calibre/ebooks/lit/writer.py b/src/calibre/ebooks/lit/writer.py index 0cadb1ac99..b5256101b8 100644 --- a/src/calibre/ebooks/lit/writer.py +++ b/src/calibre/ebooks/lit/writer.py @@ -286,6 +286,8 @@ class ReBinary(object): data.write(codepoint_to_chr(len(self.anchors)).encode('utf-8')) for anchor, offset in self.anchors: data.write(codepoint_to_chr(len(anchor)).encode('utf-8')) + if isinstance(anchor, unicode_type): + anchor = anchor.encode('utf-8') data.write(anchor) data.write(pack('H', data[76:78]) @@ -245,9 +245,9 @@ class MetadataUpdater(object): if not exth: # Construct an empty EXTH block - pad = '\0' * 4 - exth = ['EXTH', pack('>II', 12, 0), pad] - exth = ''.join(exth) + pad = b'\0' * 4 + exth = [b'EXTH', pack('>II', 12, 0), pad] + exth = b''.join(exth) # Update drm_offset(0xa8), title_offset(0x54) if self.encryption_type != 0: @@ -270,9 +270,9 @@ class MetadataUpdater(object): # Pad to a 4-byte boundary trail = len(new_record0.getvalue()) % 4 - pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte + pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte new_record0.write(pad) - new_record0.write('\0'*(1024*8)) + new_record0.write(b'\0'*(1024*8)) # Rebuild the stream, update the pdbrecords pointers self.patchSection(0,new_record0.getvalue()) @@ -334,7 +334,7 @@ class MetadataUpdater(object): if rec[0] in self.original_exth_records: self.original_exth_records.pop(rec[0]) - if self.type != "BOOKMOBI": + if self.type != b"BOOKMOBI": raise MobiError("Setting metadata only supported for MOBI files of type 'BOOK'.\n" "\tThis is a %r file of type %r" % (self.type[0:4], self.type[4:8])) @@ -382,9 +382,9 @@ class MetadataUpdater(object): update_exth_record((501, b'PDOC')) if mi.pubdate: - update_exth_record((106, str(mi.pubdate).encode(self.codec, 'replace'))) + update_exth_record((106, unicode_type(mi.pubdate).encode(self.codec, 'replace'))) elif mi.timestamp: - update_exth_record((106, str(mi.timestamp).encode(self.codec, 'replace'))) + update_exth_record((106, unicode_type(mi.timestamp).encode(self.codec, 'replace'))) elif self.timestamp: update_exth_record((106, self.timestamp)) else: @@ -429,9 +429,9 @@ class MetadataUpdater(object): exth.write(data) exth = exth.getvalue() trail = len(exth) % 4 - pad = '\0' * (4 - trail) # Always pad w/ at least 1 byte - exth = ['EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] - exth = ''.join(exth) + pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte + exth = [b'EXTH', pack('>II', len(exth) + 12, len(recs)), exth, pad] + exth = b''.join(exth) if getattr(self, 'exth', None) is None: raise MobiError('No existing EXTH record. Cannot update metadata.') @@ -481,8 +481,8 @@ def get_metadata(stream): stream.seek(0) try: raw = stream.read(3) - except: - raw = '' + except Exception: + raw = b'' stream.seek(0) if raw == b'TPZ': from calibre.ebooks.metadata.topaz import get_metadata @@ -521,8 +521,8 @@ def get_metadata(stream): else: try: data = mh.section_data(mh.first_image_index) - except: - data = '' + except Exception: + data = b'' if data and what(None, data) in {'jpg', 'jpeg', 'gif', 'png', 'bmp', 'webp'}: try: mi.cover_data = ('jpg', save_cover_data_to(data)) diff --git a/src/calibre/ebooks/metadata/topaz.py b/src/calibre/ebooks/metadata/topaz.py index a51822270f..12bb199feb 100644 --- a/src/calibre/ebooks/metadata/topaz.py +++ b/src/calibre/ebooks/metadata/topaz.py @@ -12,6 +12,14 @@ from calibre.ebooks.metadata import MetaInformation from calibre import force_unicode +class StringIO(io.StringIO): + + def write(self, x): + if isinstance(x, bytes): + x = x.decode('iso-8859-1') + return io.StringIO.write(self, x) + + class StreamSlicer(object): def __init__(self, stream, start=0, stop=None): @@ -38,7 +46,7 @@ class StreamSlicer(object): start, stop = stop, start size = stop - start if size <= 0: - return "" + return b"" stream.seek(base + start) data = stream.read(size) if stride != 1: @@ -87,7 +95,7 @@ class MetadataUpdater(object): self.data = StreamSlicer(stream) sig = self.data[:4] - if not sig.startswith('TPZ'): + if not sig.startswith(b'TPZ'): raise ValueError("'%s': Not a Topaz file" % getattr(stream, 'name', 'Unnamed stream')) offset = 4 @@ -102,7 +110,7 @@ class MetadataUpdater(object): # Second integrity test - metadata body md_offset = self.topaz_headers['metadata']['blocks'][0]['offset'] md_offset += self.base - if self.data[md_offset+1:md_offset+9] != 'metadata': + if self.data[md_offset+1:md_offset+9] != b'metadata': raise ValueError("'%s': Damaged metadata record" % getattr(stream, 'name', 'Unnamed stream')) def book_length(self): @@ -116,8 +124,9 @@ class MetadataUpdater(object): def decode_vwi(self,bytes): pos, val = 0, 0 done = False + byts = bytearray(bytes) while pos < len(bytes) and not done: - b = ord(bytes[pos]) + b = byts[pos] pos += 1 if (b & 0x80) == 0: done = True @@ -194,12 +203,12 @@ class MetadataUpdater(object): else: return None dkey = self.topaz_headers[x] - dks = io.StringIO() + dks = StringIO() dks.write(self.encode_vwi(len(dkey['tag']))) offset += 1 dks.write(dkey['tag']) offset += len('dkey') - dks.write(chr(0)) + dks.write(u'\0') offset += 1 dks.write(self.data[offset:offset + len_uncomp].decode('iso-8859-1')) return dks.getvalue().encode('iso-8859-1') @@ -233,7 +242,7 @@ class MetadataUpdater(object): return topaz_headers, th_seq def generate_metadata_stream(self): - ms = StringIO.StringIO() + ms = StringIO() ms.write(self.encode_vwi(len(self.md_header['tag'])).encode('iso-8859-1')) ms.write(self.md_header['tag']) ms.write(chr(self.md_header['flags'])) diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 6d61d8ff1e..2bc19bfcdc 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -282,7 +282,7 @@ class MobiWriter(object): # 0x4c - 0x4f : Language specifier record0.write(iana2mobi( - str(metadata.language[0]))) + unicode_type(metadata.language[0]))) # 0x50 - 0x57 : Input language and Output language record0.write(b'\0' * 8) diff --git a/src/calibre/utils/zipfile.py b/src/calibre/utils/zipfile.py index 0ff4727de6..3b9a9ef7f7 100644 --- a/src/calibre/utils/zipfile.py +++ b/src/calibre/utils/zipfile.py @@ -57,7 +57,7 @@ ZIP_DEFLATED = 8 # The "end of central directory" structure, magic number, size, and indices # (section V.I in the format document) structEndArchive = "<4s4H2LH" -stringEndArchive = "PK\005\006" +stringEndArchive = b"PK\005\006" sizeEndCentDir = struct.calcsize(structEndArchive) _ECD_SIGNATURE = 0 @@ -76,7 +76,7 @@ _ECD_LOCATION = 9 # The "central directory" structure, magic number, size, and indices # of entries in the structure (section V.F in the format document) structCentralDir = "<4s4B4HL2L5H2L" -stringCentralDir = "PK\001\002" +stringCentralDir = b"PK\001\002" sizeCentralDir = struct.calcsize(structCentralDir) # indexes of entries in the central directory structure @@ -103,7 +103,7 @@ _CD_LOCAL_HEADER_OFFSET = 18 # The "local file header" structure, magic number, size, and indices # (section V.A in the format document) structFileHeader = "<4s2B4HL2L2H" -stringFileHeader = "PK\003\004" +stringFileHeader = b"PK\003\004" sizeFileHeader = struct.calcsize(structFileHeader) _FH_SIGNATURE = 0 @@ -121,13 +121,13 @@ _FH_EXTRA_FIELD_LENGTH = 11 # The "Zip64 end of central directory locator" structure, magic number, and size structEndArchive64Locator = "<4sLQL" -stringEndArchive64Locator = "PK\x06\x07" +stringEndArchive64Locator = b"PK\x06\x07" sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator) # The "Zip64 end of central directory" record, magic number, size, and indices # (section V.G in the format document) structEndArchive64 = "<4sQ2H2L4Q" -stringEndArchive64 = "PK\x06\x06" +stringEndArchive64 = b"PK\x06\x06" sizeEndCentDir64 = struct.calcsize(structEndArchive64) _CD64_SIGNATURE = 0