From 549b822f9c7efee1fbc83d4cc88f8071457c2eca Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 10 Jun 2019 15:39:55 +0530 Subject: [PATCH] py3: More unicode fixes --- bypy/sources.json | 48 ++++++--------------- bypy/windows.conf | 5 ++- src/calibre/ebooks/pdb/__init__.py | 1 + src/calibre/ebooks/pdb/ereader/reader.py | 1 + src/calibre/ebooks/pdb/ereader/reader132.py | 9 ++-- src/calibre/ebooks/pdb/formatwriter.py | 1 + src/calibre/ebooks/pdb/haodoo/reader.py | 33 +++++++------- src/calibre/ebooks/pdb/header.py | 3 ++ src/calibre/ebooks/pdb/palmdoc/reader.py | 3 +- src/calibre/ebooks/pdb/palmdoc/writer.py | 2 +- src/calibre/ebooks/pdb/pdf/reader.py | 1 + src/calibre/ebooks/pdb/ztxt/__init__.py | 2 +- src/calibre/ebooks/pdb/ztxt/reader.py | 1 + src/calibre/ebooks/pdb/ztxt/writer.py | 4 +- 14 files changed, 54 insertions(+), 60 deletions(-) diff --git a/bypy/sources.json b/bypy/sources.json index ccf3aee8ec..e8b8026fe7 100644 --- a/bypy/sources.json +++ b/bypy/sources.json @@ -1,7 +1,7 @@ [ { "name": "nasm", - "os": "macos", + "os": "macos,windows", "unix": { "filename": "nasm-2.14.02.tar.xz", "hash": "sha256:e24ade3e928f7253aa8c14aa44726d1edf3f98643f87c9d72ec1df44b26be8f5", @@ -153,34 +153,18 @@ "urls": ["https://www.python.org/ftp/python/2.7.16/{filename}"] }, "windows": { - "filename":"python-1948b35e654e0b69ac93e31e3c3405172a6a1b91.tar.gz", - "hash":"sha1:1948b35e654e0b69ac93e31e3c3405172a6a1b91", + "filename":"python-ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82.tar.gz", + "hash":"sha256:ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82", "urls":["github:kovidgoyal/cpython"] } }, - { - "name": "pywin32", - "os": "windows", - "python": 2, - "windows": { - "filename":"pywin32-dd5760063f88a300403c74f3e81f3437b8396d8f.tar.gz", - "hash":"sha1:dd5760063f88a300403c74f3e81f3437b8396d8f", - "urls":["github:kovidgoyal/pywin32"] - } - }, - { "name": "icu", "unix": { "filename": "icu4c-64_2-src.tgz", "hash": "sha256:627d5d8478e6d96fc8c90fed4851239079a561a6a8b9e48b0892f24e82d31d6c", "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"] - }, - "windows": { - "filename": "icu4c-64_2-src.zip", - "hash": "sha1:aaa014177845c16deba888450c0c34e8bd57e736", - "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"] } }, @@ -500,6 +484,17 @@ } }, + { + "name": "pywin32", + "os": "windows", + "python": 2, + "windows": { + "filename":"pywin32-85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a.tar.gz", + "hash":"sha256:85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a", + "urls":["github:kovidgoyal/pywin32"] + } + }, + { "name": "unrardll", "unix": { @@ -767,11 +762,6 @@ "filename": "sip-4.19.16.tar.gz", "hash": "sha256:184c790d58e9527fc6bdac2bbf8638f3d1b41dea922cad8eb83172b4ba70c620", "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"] - }, - "windows": { - "filename": "sip-4.19.16.zip", - "hash": "sha1:2c0844ea0304d11343168ee25a9ff11df1646c76", - "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"] } }, @@ -781,11 +771,6 @@ "filename": "PyQt5_gpl-5.12.1.tar.gz", "hash": "sha256:3718ce847d824090fd5f95ff3f13847ee75c2507368d4cbaeb48338f506e59bf", "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"] - }, - "windows": { - "filename": "PyQt5_gpl-5.12.1.zip", - "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529", - "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"] } }, @@ -795,11 +780,6 @@ "filename": "PyQtWebEngine_gpl-5.12.1.tar.gz", "hash": "sha256:860704672ea1b616e1347be1f347bc1c749e64ed378370863fe209e84e9bd473", "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"] - }, - "windows": { - "filename": "PyQtWebEngine_gpl-5.12.1.zip", - "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529", - "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"] } }, diff --git a/bypy/windows.conf b/bypy/windows.conf index 4c78912984..389e65f864 100644 --- a/bypy/windows.conf +++ b/bypy/windows.conf @@ -1,6 +1,9 @@ -# Requires installation of Visual Studio 2017 Community Edition and Python 3.7 +# Requires installation of Visual Studio 2017 Community Edition, Git, Python 3.7 and Perl +# git.exe must be in PATH +# Intall certifi in python 3 with: # python.exe -m pip install certifi vm_name 'calibre-windows-build' root 'C:/r' python 'C:/py/python.exe' +perl 'C:/Strawberry/perl/bin/perl.exe' diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py index c950f866a5..599744fe06 100644 --- a/src/calibre/ebooks/pdb/__init__.py +++ b/src/calibre/ebooks/pdb/__init__.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index 65af647a13..bd2bb976f5 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from ereader pdb file. diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index e650da2806..b98c5e8356 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from ereader pdb file with a 132 byte header created by Dropbook. @@ -87,9 +88,9 @@ class Reader132(FormatReader): def get_image(self, number): if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1: - return 'empty', '' + return 'empty', b'' data = self.section_data(number) - name = data[4:4 + 32].strip('\x00') + name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252') img = data[62:] return name, img @@ -116,9 +117,9 @@ class Reader132(FormatReader): title = self.mi.title if not isinstance(title, unicode_type): title = title.decode('utf-8', 'replace') - html = u'%s' % title + html = '%s' % title - pml = u'' + pml = '' for i in range(1, self.header_record.num_text_pages + 1): self.log.debug('Extracting text page %i' % i) pml += self.get_text_page(i) diff --git a/src/calibre/ebooks/pdb/formatwriter.py b/src/calibre/ebooks/pdb/formatwriter.py index 556ceace12..11e80c8757 100644 --- a/src/calibre/ebooks/pdb/formatwriter.py +++ b/src/calibre/ebooks/pdb/formatwriter.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Interface defining the necessary public functions for a pdb format writer. diff --git a/src/calibre/ebooks/pdb/haodoo/reader.py b/src/calibre/ebooks/pdb/haodoo/reader.py index 1be279c882..dc4497fa13 100644 --- a/src/calibre/ebooks/pdb/haodoo/reader.py +++ b/src/calibre/ebooks/pdb/haodoo/reader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from Haodoo.net pdb file. @@ -64,7 +65,7 @@ class LegacyHeaderRecord(object): self.title = fix_punct(fields[0].decode('cp950', 'replace')) self.num_records = int(fields[1]) self.chapter_titles = list(map( - lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')), + lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')), fields[2:])) @@ -76,7 +77,7 @@ class UnicodeHeaderRecord(object): self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore')) self.num_records = int(fields[1]) self.chapter_titles = list(map( - lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')), + lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')), fields[2].split(b'\r\x00\n\x00'))) @@ -99,18 +100,18 @@ class Reader(FormatReader): def author(self): self.stream.seek(35) - version = struct.unpack(b'>b', self.stream.read(1))[0] + version = struct.unpack('>b', self.stream.read(1))[0] if version == 2: self.stream.seek(0) author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace') return author else: - return u'Unknown' + return 'Unknown' def get_metadata(self): mi = MetaInformation(self.header_record.title, [self.author()]) - mi.language = u'zh-tw' + mi.language = 'zh-tw' return mi @@ -119,10 +120,10 @@ class Reader(FormatReader): def decompress_text(self, number): return self.section_data(number).decode(self.encoding, - 'replace').rstrip(b'\x00') + 'replace').rstrip('\x00') def extract_content(self, output_dir): - txt = u'' + txt = '' self.log.info(u'Decompressing text...') for i in range(1, self.header_record.num_records + 1): @@ -134,23 +135,23 @@ class Reader(FormatReader): line = fix_punct(line) line = line.strip() if not title_added and title in line: - line = u'

' + line + u'

\n' + line = '

' + line + '

\n' title_added = True else: line = prepare_string_for_xml(line) - lines.append(u'

%s

' % line) + lines.append('

%s

' % line) if not title_added: - lines.insert(0, u'

' + title + u'

\n') - txt += u'\n'.join(lines) + lines.insert(0, '

' + title + '

\n') + txt += '\n'.join(lines) self.log.info(u'Converting text to OEB...') html = HTML_TEMPLATE % (self.header_record.title, txt) - with open(os.path.join(output_dir, u'index.html'), 'wb') as index: + with open(os.path.join(output_dir, 'index.html'), 'wb') as index: index.write(html.encode('utf-8')) mi = self.get_metadata() - manifest = [(u'index.html', None)] - spine = [u'index.html'] - opf_writer(output_dir, u'metadata.opf', manifest, spine, mi) + manifest = [('index.html', None)] + spine = ['index.html'] + opf_writer(output_dir, 'metadata.opf', manifest, spine, mi) - return os.path.join(output_dir, u'metadata.opf') + return os.path.join(output_dir, 'metadata.opf') diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py index 80905cdb1f..afe7fda08a 100644 --- a/src/calibre/ebooks/pdb/header.py +++ b/src/calibre/ebooks/pdb/header.py @@ -1,4 +1,7 @@ # -*- coding: utf-8 -*- + +from __future__ import absolute_import, division, print_function, unicode_literals + ''' Read the header data from a pdb file. ''' diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py index 6c41588246..8f84dc7782 100644 --- a/src/calibre/ebooks/pdb/palmdoc/reader.py +++ b/src/calibre/ebooks/pdb/palmdoc/reader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from palmdoc pdb file. @@ -49,7 +50,7 @@ class Reader(FormatReader): if self.header_record.compression == 2 or self.header_record.compression == 258: from calibre.ebooks.compression.palmdoc import decompress_doc return decompress_doc(self.section_data(number)) - return '' + return b'' def extract_content(self, output_dir): raw_txt = b'' diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py index 13d69b451f..272e991b41 100644 --- a/src/calibre/ebooks/pdb/palmdoc/writer.py +++ b/src/calibre/ebooks/pdb/palmdoc/writer.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from __future__ import division +from __future__ import absolute_import, division, print_function, unicode_literals ''' Writer content to palmdoc pdb file. diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py index 4f166397c4..df0af2cfbd 100644 --- a/src/calibre/ebooks/pdb/pdf/reader.py +++ b/src/calibre/ebooks/pdb/pdf/reader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from palmdoc pdb file. diff --git a/src/calibre/ebooks/pdb/ztxt/__init__.py b/src/calibre/ebooks/pdb/ztxt/__init__.py index 4dd1a954b0..18d4da97be 100644 --- a/src/calibre/ebooks/pdb/ztxt/__init__.py +++ b/src/calibre/ebooks/pdb/ztxt/__init__.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' @@ -7,4 +8,3 @@ __docformat__ = 'restructuredtext en' class zTXTError(Exception): pass - diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py index 8bfda3f59a..a1c36cf808 100644 --- a/src/calibre/ebooks/pdb/ztxt/reader.py +++ b/src/calibre/ebooks/pdb/ztxt/reader.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Read content from ztxt pdb file. diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py index 5545349545..05805c4253 100644 --- a/src/calibre/ebooks/pdb/ztxt/writer.py +++ b/src/calibre/ebooks/pdb/ztxt/writer.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +from __future__ import absolute_import, division, print_function, unicode_literals ''' Writer content to ztxt pdb file. @@ -67,7 +68,7 @@ class Writer(FormatWriter): return txt_records, txt_length def _header_record(self, txt_length, record_count, crc32): - record = '' + record = b'' record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44 record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book. @@ -83,4 +84,3 @@ class Writer(FormatWriter): record += struct.pack('>LL', 0, 0) # [24:32], padding return record -