py3: More unicode fixes

This commit is contained in:
Kovid Goyal 2019-06-10 15:39:55 +05:30
parent 2ecd6dc1b1
commit 549b822f9c
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
14 changed files with 54 additions and 60 deletions

View File

@ -1,7 +1,7 @@
[
{
"name": "nasm",
"os": "macos",
"os": "macos,windows",
"unix": {
"filename": "nasm-2.14.02.tar.xz",
"hash": "sha256:e24ade3e928f7253aa8c14aa44726d1edf3f98643f87c9d72ec1df44b26be8f5",
@ -153,34 +153,18 @@
"urls": ["https://www.python.org/ftp/python/2.7.16/{filename}"]
},
"windows": {
"filename":"python-1948b35e654e0b69ac93e31e3c3405172a6a1b91.tar.gz",
"hash":"sha1:1948b35e654e0b69ac93e31e3c3405172a6a1b91",
"filename":"python-ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82.tar.gz",
"hash":"sha256:ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82",
"urls":["github:kovidgoyal/cpython"]
}
},
{
"name": "pywin32",
"os": "windows",
"python": 2,
"windows": {
"filename":"pywin32-dd5760063f88a300403c74f3e81f3437b8396d8f.tar.gz",
"hash":"sha1:dd5760063f88a300403c74f3e81f3437b8396d8f",
"urls":["github:kovidgoyal/pywin32"]
}
},
{
"name": "icu",
"unix": {
"filename": "icu4c-64_2-src.tgz",
"hash": "sha256:627d5d8478e6d96fc8c90fed4851239079a561a6a8b9e48b0892f24e82d31d6c",
"urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
},
"windows": {
"filename": "icu4c-64_2-src.zip",
"hash": "sha1:aaa014177845c16deba888450c0c34e8bd57e736",
"urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
}
},
@ -500,6 +484,17 @@
}
},
{
"name": "pywin32",
"os": "windows",
"python": 2,
"windows": {
"filename":"pywin32-85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a.tar.gz",
"hash":"sha256:85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a",
"urls":["github:kovidgoyal/pywin32"]
}
},
{
"name": "unrardll",
"unix": {
@ -767,11 +762,6 @@
"filename": "sip-4.19.16.tar.gz",
"hash": "sha256:184c790d58e9527fc6bdac2bbf8638f3d1b41dea922cad8eb83172b4ba70c620",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
},
"windows": {
"filename": "sip-4.19.16.zip",
"hash": "sha1:2c0844ea0304d11343168ee25a9ff11df1646c76",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
}
},
@ -781,11 +771,6 @@
"filename": "PyQt5_gpl-5.12.1.tar.gz",
"hash": "sha256:3718ce847d824090fd5f95ff3f13847ee75c2507368d4cbaeb48338f506e59bf",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
},
"windows": {
"filename": "PyQt5_gpl-5.12.1.zip",
"hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
}
},
@ -795,11 +780,6 @@
"filename": "PyQtWebEngine_gpl-5.12.1.tar.gz",
"hash": "sha256:860704672ea1b616e1347be1f347bc1c749e64ed378370863fe209e84e9bd473",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
},
"windows": {
"filename": "PyQtWebEngine_gpl-5.12.1.zip",
"hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
}
},

View File

@ -1,6 +1,9 @@
# Requires installation of Visual Studio 2017 Community Edition and Python 3.7
# Requires installation of Visual Studio 2017 Community Edition, Git, Python 3.7 and Perl
# git.exe must be in PATH
# Intall certifi in python 3 with:
# python.exe -m pip install certifi
vm_name 'calibre-windows-build'
root 'C:/r'
python 'C:/py/python.exe'
perl 'C:/Strawberry/perl/bin/perl.exe'

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from ereader pdb file.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from ereader pdb file with a 132 byte header created by Dropbook.
@ -87,9 +88,9 @@ class Reader132(FormatReader):
def get_image(self, number):
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
return 'empty', ''
return 'empty', b''
data = self.section_data(number)
name = data[4:4 + 32].strip('\x00')
name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252')
img = data[62:]
return name, img
@ -116,9 +117,9 @@ class Reader132(FormatReader):
title = self.mi.title
if not isinstance(title, unicode_type):
title = title.decode('utf-8', 'replace')
html = u'<html><head><title>%s</title></head><body>' % title
html = '<html><head><title>%s</title></head><body>' % title
pml = u''
pml = ''
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
pml += self.get_text_page(i)

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Interface defining the necessary public functions for a pdb format writer.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from Haodoo.net pdb file.
@ -64,7 +65,7 @@ class LegacyHeaderRecord(object):
self.title = fix_punct(fields[0].decode('cp950', 'replace'))
self.num_records = int(fields[1])
self.chapter_titles = list(map(
lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')),
lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')),
fields[2:]))
@ -76,7 +77,7 @@ class UnicodeHeaderRecord(object):
self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
self.num_records = int(fields[1])
self.chapter_titles = list(map(
lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')),
lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')),
fields[2].split(b'\r\x00\n\x00')))
@ -99,18 +100,18 @@ class Reader(FormatReader):
def author(self):
self.stream.seek(35)
version = struct.unpack(b'>b', self.stream.read(1))[0]
version = struct.unpack('>b', self.stream.read(1))[0]
if version == 2:
self.stream.seek(0)
author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
return author
else:
return u'Unknown'
return 'Unknown'
def get_metadata(self):
mi = MetaInformation(self.header_record.title,
[self.author()])
mi.language = u'zh-tw'
mi.language = 'zh-tw'
return mi
@ -119,10 +120,10 @@ class Reader(FormatReader):
def decompress_text(self, number):
return self.section_data(number).decode(self.encoding,
'replace').rstrip(b'\x00')
'replace').rstrip('\x00')
def extract_content(self, output_dir):
txt = u''
txt = ''
self.log.info(u'Decompressing text...')
for i in range(1, self.header_record.num_records + 1):
@ -134,23 +135,23 @@ class Reader(FormatReader):
line = fix_punct(line)
line = line.strip()
if not title_added and title in line:
line = u'<h1 class="chapter">' + line + u'</h1>\n'
line = '<h1 class="chapter">' + line + '</h1>\n'
title_added = True
else:
line = prepare_string_for_xml(line)
lines.append(u'<p>%s</p>' % line)
lines.append('<p>%s</p>' % line)
if not title_added:
lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
txt += u'\n'.join(lines)
lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
txt += '\n'.join(lines)
self.log.info(u'Converting text to OEB...')
html = HTML_TEMPLATE % (self.header_record.title, txt)
with open(os.path.join(output_dir, u'index.html'), 'wb') as index:
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
index.write(html.encode('utf-8'))
mi = self.get_metadata()
manifest = [(u'index.html', None)]
spine = [u'index.html']
opf_writer(output_dir, u'metadata.opf', manifest, spine, mi)
manifest = [('index.html', None)]
spine = ['index.html']
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
return os.path.join(output_dir, u'metadata.opf')
return os.path.join(output_dir, 'metadata.opf')

View File

@ -1,4 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read the header data from a pdb file.
'''

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from palmdoc pdb file.
@ -49,7 +50,7 @@ class Reader(FormatReader):
if self.header_record.compression == 2 or self.header_record.compression == 258:
from calibre.ebooks.compression.palmdoc import decompress_doc
return decompress_doc(self.section_data(number))
return ''
return b''
def extract_content(self, output_dir):
raw_txt = b''

View File

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import division
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Writer content to palmdoc pdb file.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from palmdoc pdb file.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
@ -7,4 +8,3 @@ __docformat__ = 'restructuredtext en'
class zTXTError(Exception):
pass

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Read content from ztxt pdb file.

View File

@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
'''
Writer content to ztxt pdb file.
@ -67,7 +68,7 @@ class Writer(FormatWriter):
return txt_records, txt_length
def _header_record(self, txt_length, record_count, crc32):
record = ''
record = b''
record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44
record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book.
@ -83,4 +84,3 @@ class Writer(FormatWriter):
record += struct.pack('>LL', 0, 0) # [24:32], padding
return record