py3: More unicode fixes

2025-07-09 03:04:10 -04:00 · 2019-06-10 15:39:55 +05:30 · 2019-06-10 15:39:55 +05:30 · 549b822f9c
commit 549b822f9c
parent 2ecd6dc1b1
14 changed files with 54 additions and 60 deletions
--- a/bypy/sources.json
+++ b/bypy/sources.json
@ -1,7 +1,7 @@
 [
    {
        "name": "nasm",
-        "os": "macos",
+        "os": "macos,windows",
        "unix": {
            "filename": "nasm-2.14.02.tar.xz",
            "hash": "sha256:e24ade3e928f7253aa8c14aa44726d1edf3f98643f87c9d72ec1df44b26be8f5",
@ -153,34 +153,18 @@
            "urls": ["https://www.python.org/ftp/python/2.7.16/{filename}"]
        },
        "windows": {
-            "filename":"python-1948b35e654e0b69ac93e31e3c3405172a6a1b91.tar.gz",
-            "hash":"sha1:1948b35e654e0b69ac93e31e3c3405172a6a1b91",
+            "filename":"python-ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82.tar.gz",
+            "hash":"sha256:ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82",
            "urls":["github:kovidgoyal/cpython"]
        }
    },

-    {
-        "name": "pywin32",
-        "os": "windows",
-        "python": 2,
-        "windows": {
-            "filename":"pywin32-dd5760063f88a300403c74f3e81f3437b8396d8f.tar.gz",
-            "hash":"sha1:dd5760063f88a300403c74f3e81f3437b8396d8f",
-            "urls":["github:kovidgoyal/pywin32"]
-        }
-    },
-
    {
        "name": "icu",
        "unix": {
            "filename": "icu4c-64_2-src.tgz",
            "hash": "sha256:627d5d8478e6d96fc8c90fed4851239079a561a6a8b9e48b0892f24e82d31d6c",
            "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
-        },
-        "windows": {
-            "filename": "icu4c-64_2-src.zip",
-            "hash": "sha1:aaa014177845c16deba888450c0c34e8bd57e736",
-            "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
        }
    },

@ -500,6 +484,17 @@
        }
    },

+    {
+        "name": "pywin32",
+        "os": "windows",
+        "python": 2,
+        "windows": {
+            "filename":"pywin32-85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a.tar.gz",
+            "hash":"sha256:85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a",
+            "urls":["github:kovidgoyal/pywin32"]
+        }
+    },
+
    {
        "name": "unrardll",
        "unix": {
@ -767,11 +762,6 @@
            "filename": "sip-4.19.16.tar.gz",
            "hash": "sha256:184c790d58e9527fc6bdac2bbf8638f3d1b41dea922cad8eb83172b4ba70c620",
            "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
-        },
-        "windows": {
-            "filename": "sip-4.19.16.zip",
-            "hash": "sha1:2c0844ea0304d11343168ee25a9ff11df1646c76",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
        }
    },

@ -781,11 +771,6 @@
            "filename": "PyQt5_gpl-5.12.1.tar.gz",
            "hash": "sha256:3718ce847d824090fd5f95ff3f13847ee75c2507368d4cbaeb48338f506e59bf",
            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
-        },
-        "windows": {
-            "filename": "PyQt5_gpl-5.12.1.zip",
-            "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
        }
    },

@ -795,11 +780,6 @@
            "filename": "PyQtWebEngine_gpl-5.12.1.tar.gz",
            "hash": "sha256:860704672ea1b616e1347be1f347bc1c749e64ed378370863fe209e84e9bd473",
            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
-        },
-        "windows": {
-            "filename": "PyQtWebEngine_gpl-5.12.1.zip",
-            "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
        }
    },

--- a/bypy/windows.conf
+++ b/bypy/windows.conf
@ -1,6 +1,9 @@
-# Requires installation of Visual Studio 2017 Community Edition and Python 3.7
+# Requires installation of Visual Studio 2017 Community Edition, Git, Python 3.7 and Perl 
+# git.exe must be in PATH
+# Intall certifi in python 3 with:
 # python.exe -m pip install certifi

 vm_name 'calibre-windows-build'
 root 'C:/r'
 python 'C:/py/python.exe'
+perl 'C:/Strawberry/perl/bin/perl.exe'
--- a/src/calibre/ebooks/pdb/init.py
+++ b/src/calibre/ebooks/pdb/init.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from ereader pdb file.
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from ereader pdb file with a 132 byte header created by Dropbook.
@ -87,9 +88,9 @@ class Reader132(FormatReader):

    def get_image(self, number):
        if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
-            return 'empty', ''
+            return 'empty', b''
        data = self.section_data(number)
-        name = data[4:4 + 32].strip('\x00')
+        name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252')
        img = data[62:]
        return name, img

@ -116,9 +117,9 @@ class Reader132(FormatReader):
        title = self.mi.title
        if not isinstance(title, unicode_type):
            title = title.decode('utf-8', 'replace')
-        html = u'<html><head><title>%s</title></head><body>' % title
+        html = '<html><head><title>%s</title></head><body>' % title

-        pml = u''
+        pml = ''
        for i in range(1, self.header_record.num_text_pages + 1):
            self.log.debug('Extracting text page %i' % i)
            pml += self.get_text_page(i)
--- a/src/calibre/ebooks/pdb/formatwriter.py
+++ b/src/calibre/ebooks/pdb/formatwriter.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Interface defining the necessary public functions for a pdb format writer.
--- a/src/calibre/ebooks/pdb/haodoo/reader.py
+++ b/src/calibre/ebooks/pdb/haodoo/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from Haodoo.net pdb file.
@ -64,7 +65,7 @@ class LegacyHeaderRecord(object):
        self.title = fix_punct(fields[0].decode('cp950', 'replace'))
        self.num_records = int(fields[1])
        self.chapter_titles = list(map(
-            lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')),
+            lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')),
            fields[2:]))


@ -76,7 +77,7 @@ class UnicodeHeaderRecord(object):
        self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
        self.num_records = int(fields[1])
        self.chapter_titles = list(map(
-            lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')),
+            lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')),
            fields[2].split(b'\r\x00\n\x00')))


@ -99,18 +100,18 @@ class Reader(FormatReader):

    def author(self):
        self.stream.seek(35)
-        version = struct.unpack(b'>b', self.stream.read(1))[0]
+        version = struct.unpack('>b', self.stream.read(1))[0]
        if version == 2:
            self.stream.seek(0)
            author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
            return author
        else:
-            return u'Unknown'
+            return 'Unknown'

    def get_metadata(self):
        mi = MetaInformation(self.header_record.title,
                             [self.author()])
-        mi.language = u'zh-tw'
+        mi.language = 'zh-tw'

        return mi

@ -119,10 +120,10 @@ class Reader(FormatReader):

    def decompress_text(self, number):
        return self.section_data(number).decode(self.encoding,
-                'replace').rstrip(b'\x00')
+                'replace').rstrip('\x00')

    def extract_content(self, output_dir):
-        txt = u''
+        txt = ''

        self.log.info(u'Decompressing text...')
        for i in range(1, self.header_record.num_records + 1):
@ -134,23 +135,23 @@ class Reader(FormatReader):
                line = fix_punct(line)
                line = line.strip()
                if not title_added and title in line:
-                    line = u'<h1 class="chapter">' + line + u'</h1>\n'
+                    line = '<h1 class="chapter">' + line + '</h1>\n'
                    title_added = True
                else:
                    line = prepare_string_for_xml(line)
-                lines.append(u'<p>%s</p>' % line)
+                lines.append('<p>%s</p>' % line)
            if not title_added:
-                lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
-            txt += u'\n'.join(lines)
+                lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
+            txt += '\n'.join(lines)

        self.log.info(u'Converting text to OEB...')
        html = HTML_TEMPLATE % (self.header_record.title, txt)
-        with open(os.path.join(output_dir, u'index.html'), 'wb') as index:
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
            index.write(html.encode('utf-8'))

        mi = self.get_metadata()
-        manifest = [(u'index.html', None)]
-        spine = [u'index.html']
-        opf_writer(output_dir, u'metadata.opf', manifest, spine, mi)
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)

-        return os.path.join(output_dir, u'metadata.opf')
+        return os.path.join(output_dir, 'metadata.opf')
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
 '''
 Read the header data from a pdb file.
 '''
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from palmdoc pdb file.
@ -49,7 +50,7 @@ class Reader(FormatReader):
        if self.header_record.compression == 2 or self.header_record.compression == 258:
            from calibre.ebooks.compression.palmdoc import decompress_doc
            return decompress_doc(self.section_data(number))
-        return ''
+        return b''

    def extract_content(self, output_dir):
        raw_txt = b''
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import division
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Writer content to palmdoc pdb file.
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from palmdoc pdb file.
--- a/src/calibre/ebooks/pdb/ztxt/init.py
+++ b/src/calibre/ebooks/pdb/ztxt/init.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -7,4 +8,3 @@ __docformat__ = 'restructuredtext en'

 class zTXTError(Exception):
    pass
-
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Read content from ztxt pdb file.
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals

 '''
 Writer content to ztxt pdb file.
@ -67,7 +68,7 @@ class Writer(FormatWriter):
        return txt_records, txt_length

    def _header_record(self, txt_length, record_count, crc32):
-        record = ''
+        record = b''

        record += struct.pack('>H', 0x012c)             # [0:2], version. 0x012c = 1.44
        record += struct.pack('>H', record_count)       # [2:4], Number of PDB records used for the text of the book.
@ -83,4 +84,3 @@ class Writer(FormatWriter):
        record += struct.pack('>LL', 0, 0)              # [24:32], padding

        return record
-