From 549b822f9c7efee1fbc83d4cc88f8071457c2eca Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Mon, 10 Jun 2019 15:39:55 +0530
Subject: [PATCH] py3: More unicode fixes

---
 bypy/sources.json                           | 48 ++++++---------------
 bypy/windows.conf                           |  5 ++-
 src/calibre/ebooks/pdb/__init__.py          |  1 +
 src/calibre/ebooks/pdb/ereader/reader.py    |  1 +
 src/calibre/ebooks/pdb/ereader/reader132.py |  9 ++--
 src/calibre/ebooks/pdb/formatwriter.py      |  1 +
 src/calibre/ebooks/pdb/haodoo/reader.py     | 33 +++++++-------
 src/calibre/ebooks/pdb/header.py            |  3 ++
 src/calibre/ebooks/pdb/palmdoc/reader.py    |  3 +-
 src/calibre/ebooks/pdb/palmdoc/writer.py    |  2 +-
 src/calibre/ebooks/pdb/pdf/reader.py        |  1 +
 src/calibre/ebooks/pdb/ztxt/__init__.py     |  2 +-
 src/calibre/ebooks/pdb/ztxt/reader.py       |  1 +
 src/calibre/ebooks/pdb/ztxt/writer.py       |  4 +-
 14 files changed, 54 insertions(+), 60 deletions(-)

diff --git a/bypy/sources.json b/bypy/sources.json
index ccf3aee8ec..e8b8026fe7 100644
--- a/bypy/sources.json
+++ b/bypy/sources.json
@@ -1,7 +1,7 @@
 [
     {
         "name": "nasm",
-        "os": "macos",
+        "os": "macos,windows",
         "unix": {
             "filename": "nasm-2.14.02.tar.xz",
             "hash": "sha256:e24ade3e928f7253aa8c14aa44726d1edf3f98643f87c9d72ec1df44b26be8f5",
@@ -153,34 +153,18 @@
             "urls": ["https://www.python.org/ftp/python/2.7.16/{filename}"]
         },
         "windows": {
-            "filename":"python-1948b35e654e0b69ac93e31e3c3405172a6a1b91.tar.gz",
-            "hash":"sha1:1948b35e654e0b69ac93e31e3c3405172a6a1b91",
+            "filename":"python-ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82.tar.gz",
+            "hash":"sha256:ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82",
             "urls":["github:kovidgoyal/cpython"]
         }
     },
 
-    {
-        "name": "pywin32",
-        "os": "windows",
-        "python": 2,
-        "windows": {
-            "filename":"pywin32-dd5760063f88a300403c74f3e81f3437b8396d8f.tar.gz",
-            "hash":"sha1:dd5760063f88a300403c74f3e81f3437b8396d8f",
-            "urls":["github:kovidgoyal/pywin32"]
-        }
-    },
-
     {
         "name": "icu",
         "unix": {
             "filename": "icu4c-64_2-src.tgz",
             "hash": "sha256:627d5d8478e6d96fc8c90fed4851239079a561a6a8b9e48b0892f24e82d31d6c",
             "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
-        },
-        "windows": {
-            "filename": "icu4c-64_2-src.zip",
-            "hash": "sha1:aaa014177845c16deba888450c0c34e8bd57e736",
-            "urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
         }
     },
 
@@ -500,6 +484,17 @@
         }
     },
 
+    {
+        "name": "pywin32",
+        "os": "windows",
+        "python": 2,
+        "windows": {
+            "filename":"pywin32-85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a.tar.gz",
+            "hash":"sha256:85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a",
+            "urls":["github:kovidgoyal/pywin32"]
+        }
+    },
+
     {
         "name": "unrardll",
         "unix": {
@@ -767,11 +762,6 @@
             "filename": "sip-4.19.16.tar.gz",
             "hash": "sha256:184c790d58e9527fc6bdac2bbf8638f3d1b41dea922cad8eb83172b4ba70c620",
             "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
-        },
-        "windows": {
-            "filename": "sip-4.19.16.zip",
-            "hash": "sha1:2c0844ea0304d11343168ee25a9ff11df1646c76",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
         }
     },
 
@@ -781,11 +771,6 @@
             "filename": "PyQt5_gpl-5.12.1.tar.gz",
             "hash": "sha256:3718ce847d824090fd5f95ff3f13847ee75c2507368d4cbaeb48338f506e59bf",
             "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
-        },
-        "windows": {
-            "filename": "PyQt5_gpl-5.12.1.zip",
-            "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
         }
     },
 
@@ -795,11 +780,6 @@
             "filename": "PyQtWebEngine_gpl-5.12.1.tar.gz",
             "hash": "sha256:860704672ea1b616e1347be1f347bc1c749e64ed378370863fe209e84e9bd473",
             "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
-        },
-        "windows": {
-            "filename": "PyQtWebEngine_gpl-5.12.1.zip",
-            "hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
-            "urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
         }
     },
 
diff --git a/bypy/windows.conf b/bypy/windows.conf
index 4c78912984..389e65f864 100644
--- a/bypy/windows.conf
+++ b/bypy/windows.conf
@@ -1,6 +1,9 @@
-# Requires installation of Visual Studio 2017 Community Edition and Python 3.7
+# Requires installation of Visual Studio 2017 Community Edition, Git, Python 3.7 and Perl 
+# git.exe must be in PATH
+# Intall certifi in python 3 with:
 # python.exe -m pip install certifi
 
 vm_name 'calibre-windows-build'
 root 'C:/r'
 python 'C:/py/python.exe'
+perl 'C:/Strawberry/perl/bin/perl.exe'
diff --git a/src/calibre/ebooks/pdb/__init__.py b/src/calibre/ebooks/pdb/__init__.py
index c950f866a5..599744fe06 100644
--- a/src/calibre/ebooks/pdb/__init__.py
+++ b/src/calibre/ebooks/pdb/__init__.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py
index 65af647a13..bd2bb976f5 100644
--- a/src/calibre/ebooks/pdb/ereader/reader.py
+++ b/src/calibre/ebooks/pdb/ereader/reader.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from ereader pdb file.
diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py
index e650da2806..b98c5e8356 100644
--- a/src/calibre/ebooks/pdb/ereader/reader132.py
+++ b/src/calibre/ebooks/pdb/ereader/reader132.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from ereader pdb file with a 132 byte header created by Dropbook.
@@ -87,9 +88,9 @@ class Reader132(FormatReader):
 
     def get_image(self, number):
         if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
-            return 'empty', ''
+            return 'empty', b''
         data = self.section_data(number)
-        name = data[4:4 + 32].strip('\x00')
+        name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252')
         img = data[62:]
         return name, img
 
@@ -116,9 +117,9 @@ class Reader132(FormatReader):
         title = self.mi.title
         if not isinstance(title, unicode_type):
             title = title.decode('utf-8', 'replace')
-        html = u'<html><head><title>%s</title></head><body>' % title
+        html = '<html><head><title>%s</title></head><body>' % title
 
-        pml = u''
+        pml = ''
         for i in range(1, self.header_record.num_text_pages + 1):
             self.log.debug('Extracting text page %i' % i)
             pml += self.get_text_page(i)
diff --git a/src/calibre/ebooks/pdb/formatwriter.py b/src/calibre/ebooks/pdb/formatwriter.py
index 556ceace12..11e80c8757 100644
--- a/src/calibre/ebooks/pdb/formatwriter.py
+++ b/src/calibre/ebooks/pdb/formatwriter.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Interface defining the necessary public functions for a pdb format writer.
diff --git a/src/calibre/ebooks/pdb/haodoo/reader.py b/src/calibre/ebooks/pdb/haodoo/reader.py
index 1be279c882..dc4497fa13 100644
--- a/src/calibre/ebooks/pdb/haodoo/reader.py
+++ b/src/calibre/ebooks/pdb/haodoo/reader.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from Haodoo.net pdb file.
@@ -64,7 +65,7 @@ class LegacyHeaderRecord(object):
         self.title = fix_punct(fields[0].decode('cp950', 'replace'))
         self.num_records = int(fields[1])
         self.chapter_titles = list(map(
-            lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')),
+            lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')),
             fields[2:]))
 
 
@@ -76,7 +77,7 @@ class UnicodeHeaderRecord(object):
         self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
         self.num_records = int(fields[1])
         self.chapter_titles = list(map(
-            lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')),
+            lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')),
             fields[2].split(b'\r\x00\n\x00')))
 
 
@@ -99,18 +100,18 @@ class Reader(FormatReader):
 
     def author(self):
         self.stream.seek(35)
-        version = struct.unpack(b'>b', self.stream.read(1))[0]
+        version = struct.unpack('>b', self.stream.read(1))[0]
         if version == 2:
             self.stream.seek(0)
             author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
             return author
         else:
-            return u'Unknown'
+            return 'Unknown'
 
     def get_metadata(self):
         mi = MetaInformation(self.header_record.title,
                              [self.author()])
-        mi.language = u'zh-tw'
+        mi.language = 'zh-tw'
 
         return mi
 
@@ -119,10 +120,10 @@ class Reader(FormatReader):
 
     def decompress_text(self, number):
         return self.section_data(number).decode(self.encoding,
-                'replace').rstrip(b'\x00')
+                'replace').rstrip('\x00')
 
     def extract_content(self, output_dir):
-        txt = u''
+        txt = ''
 
         self.log.info(u'Decompressing text...')
         for i in range(1, self.header_record.num_records + 1):
@@ -134,23 +135,23 @@ class Reader(FormatReader):
                 line = fix_punct(line)
                 line = line.strip()
                 if not title_added and title in line:
-                    line = u'<h1 class="chapter">' + line + u'</h1>\n'
+                    line = '<h1 class="chapter">' + line + '</h1>\n'
                     title_added = True
                 else:
                     line = prepare_string_for_xml(line)
-                lines.append(u'<p>%s</p>' % line)
+                lines.append('<p>%s</p>' % line)
             if not title_added:
-                lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
-            txt += u'\n'.join(lines)
+                lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
+            txt += '\n'.join(lines)
 
         self.log.info(u'Converting text to OEB...')
         html = HTML_TEMPLATE % (self.header_record.title, txt)
-        with open(os.path.join(output_dir, u'index.html'), 'wb') as index:
+        with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
             index.write(html.encode('utf-8'))
 
         mi = self.get_metadata()
-        manifest = [(u'index.html', None)]
-        spine = [u'index.html']
-        opf_writer(output_dir, u'metadata.opf', manifest, spine, mi)
+        manifest = [('index.html', None)]
+        spine = ['index.html']
+        opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
 
-        return os.path.join(output_dir, u'metadata.opf')
+        return os.path.join(output_dir, 'metadata.opf')
diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py
index 80905cdb1f..afe7fda08a 100644
--- a/src/calibre/ebooks/pdb/header.py
+++ b/src/calibre/ebooks/pdb/header.py
@@ -1,4 +1,7 @@
 # -*- coding: utf-8 -*-
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
 '''
 Read the header data from a pdb file.
 '''
diff --git a/src/calibre/ebooks/pdb/palmdoc/reader.py b/src/calibre/ebooks/pdb/palmdoc/reader.py
index 6c41588246..8f84dc7782 100644
--- a/src/calibre/ebooks/pdb/palmdoc/reader.py
+++ b/src/calibre/ebooks/pdb/palmdoc/reader.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from palmdoc pdb file.
@@ -49,7 +50,7 @@ class Reader(FormatReader):
         if self.header_record.compression == 2 or self.header_record.compression == 258:
             from calibre.ebooks.compression.palmdoc import decompress_doc
             return decompress_doc(self.section_data(number))
-        return ''
+        return b''
 
     def extract_content(self, output_dir):
         raw_txt = b''
diff --git a/src/calibre/ebooks/pdb/palmdoc/writer.py b/src/calibre/ebooks/pdb/palmdoc/writer.py
index 13d69b451f..272e991b41 100644
--- a/src/calibre/ebooks/pdb/palmdoc/writer.py
+++ b/src/calibre/ebooks/pdb/palmdoc/writer.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-from __future__ import division
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Writer content to palmdoc pdb file.
diff --git a/src/calibre/ebooks/pdb/pdf/reader.py b/src/calibre/ebooks/pdb/pdf/reader.py
index 4f166397c4..df0af2cfbd 100644
--- a/src/calibre/ebooks/pdb/pdf/reader.py
+++ b/src/calibre/ebooks/pdb/pdf/reader.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from palmdoc pdb file.
diff --git a/src/calibre/ebooks/pdb/ztxt/__init__.py b/src/calibre/ebooks/pdb/ztxt/__init__.py
index 4dd1a954b0..18d4da97be 100644
--- a/src/calibre/ebooks/pdb/ztxt/__init__.py
+++ b/src/calibre/ebooks/pdb/ztxt/__init__.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 __license__   = 'GPL v3'
 __copyright__ = '2009, John Schember <john@nachtimwald.com>'
@@ -7,4 +8,3 @@ __docformat__ = 'restructuredtext en'
 
 class zTXTError(Exception):
     pass
-
diff --git a/src/calibre/ebooks/pdb/ztxt/reader.py b/src/calibre/ebooks/pdb/ztxt/reader.py
index 8bfda3f59a..a1c36cf808 100644
--- a/src/calibre/ebooks/pdb/ztxt/reader.py
+++ b/src/calibre/ebooks/pdb/ztxt/reader.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Read content from ztxt pdb file.
diff --git a/src/calibre/ebooks/pdb/ztxt/writer.py b/src/calibre/ebooks/pdb/ztxt/writer.py
index 5545349545..05805c4253 100644
--- a/src/calibre/ebooks/pdb/ztxt/writer.py
+++ b/src/calibre/ebooks/pdb/ztxt/writer.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 '''
 Writer content to ztxt pdb file.
@@ -67,7 +68,7 @@ class Writer(FormatWriter):
         return txt_records, txt_length
 
     def _header_record(self, txt_length, record_count, crc32):
-        record = ''
+        record = b''
 
         record += struct.pack('>H', 0x012c)             # [0:2], version. 0x012c = 1.44
         record += struct.pack('>H', record_count)       # [2:4], Number of PDB records used for the text of the book.
@@ -83,4 +84,3 @@ class Writer(FormatWriter):
         record += struct.pack('>LL', 0, 0)              # [24:32], padding
 
         return record
-