mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: More unicode fixes
This commit is contained in:
parent
2ecd6dc1b1
commit
549b822f9c
@ -1,7 +1,7 @@
|
||||
[
|
||||
{
|
||||
"name": "nasm",
|
||||
"os": "macos",
|
||||
"os": "macos,windows",
|
||||
"unix": {
|
||||
"filename": "nasm-2.14.02.tar.xz",
|
||||
"hash": "sha256:e24ade3e928f7253aa8c14aa44726d1edf3f98643f87c9d72ec1df44b26be8f5",
|
||||
@ -153,34 +153,18 @@
|
||||
"urls": ["https://www.python.org/ftp/python/2.7.16/{filename}"]
|
||||
},
|
||||
"windows": {
|
||||
"filename":"python-1948b35e654e0b69ac93e31e3c3405172a6a1b91.tar.gz",
|
||||
"hash":"sha1:1948b35e654e0b69ac93e31e3c3405172a6a1b91",
|
||||
"filename":"python-ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82.tar.gz",
|
||||
"hash":"sha256:ca3c62c123b8c81a39dfaa785dfa77a0e40b3604ca4bdaceb65df10e32e29b82",
|
||||
"urls":["github:kovidgoyal/cpython"]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "pywin32",
|
||||
"os": "windows",
|
||||
"python": 2,
|
||||
"windows": {
|
||||
"filename":"pywin32-dd5760063f88a300403c74f3e81f3437b8396d8f.tar.gz",
|
||||
"hash":"sha1:dd5760063f88a300403c74f3e81f3437b8396d8f",
|
||||
"urls":["github:kovidgoyal/pywin32"]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "icu",
|
||||
"unix": {
|
||||
"filename": "icu4c-64_2-src.tgz",
|
||||
"hash": "sha256:627d5d8478e6d96fc8c90fed4851239079a561a6a8b9e48b0892f24e82d31d6c",
|
||||
"urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
|
||||
},
|
||||
"windows": {
|
||||
"filename": "icu4c-64_2-src.zip",
|
||||
"hash": "sha1:aaa014177845c16deba888450c0c34e8bd57e736",
|
||||
"urls": ["http://download.icu-project.org/files/icu4c/64.2/{filename}"]
|
||||
}
|
||||
},
|
||||
|
||||
@ -500,6 +484,17 @@
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "pywin32",
|
||||
"os": "windows",
|
||||
"python": 2,
|
||||
"windows": {
|
||||
"filename":"pywin32-85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a.tar.gz",
|
||||
"hash":"sha256:85c57822da3f8922b8d15a3d1eb7d3c954015a908dd2b663001408ef4c52b74a",
|
||||
"urls":["github:kovidgoyal/pywin32"]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"name": "unrardll",
|
||||
"unix": {
|
||||
@ -767,11 +762,6 @@
|
||||
"filename": "sip-4.19.16.tar.gz",
|
||||
"hash": "sha256:184c790d58e9527fc6bdac2bbf8638f3d1b41dea922cad8eb83172b4ba70c620",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
|
||||
},
|
||||
"windows": {
|
||||
"filename": "sip-4.19.16.zip",
|
||||
"hash": "sha1:2c0844ea0304d11343168ee25a9ff11df1646c76",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/sip/4.19.16/{filename}"]
|
||||
}
|
||||
},
|
||||
|
||||
@ -781,11 +771,6 @@
|
||||
"filename": "PyQt5_gpl-5.12.1.tar.gz",
|
||||
"hash": "sha256:3718ce847d824090fd5f95ff3f13847ee75c2507368d4cbaeb48338f506e59bf",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
|
||||
},
|
||||
"windows": {
|
||||
"filename": "PyQt5_gpl-5.12.1.zip",
|
||||
"hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQt5/5.12.1/{filename}"]
|
||||
}
|
||||
},
|
||||
|
||||
@ -795,11 +780,6 @@
|
||||
"filename": "PyQtWebEngine_gpl-5.12.1.tar.gz",
|
||||
"hash": "sha256:860704672ea1b616e1347be1f347bc1c749e64ed378370863fe209e84e9bd473",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
|
||||
},
|
||||
"windows": {
|
||||
"filename": "PyQtWebEngine_gpl-5.12.1.zip",
|
||||
"hash": "sha1:adc7cd647b23c438788f62e1e804ca2ee0ad1529",
|
||||
"urls": ["https://www.riverbankcomputing.com/static/Downloads/PyQtWebEngine/5.12.1/{filename}"]
|
||||
}
|
||||
},
|
||||
|
||||
|
@ -1,6 +1,9 @@
|
||||
# Requires installation of Visual Studio 2017 Community Edition and Python 3.7
|
||||
# Requires installation of Visual Studio 2017 Community Edition, Git, Python 3.7 and Perl
|
||||
# git.exe must be in PATH
|
||||
# Intall certifi in python 3 with:
|
||||
# python.exe -m pip install certifi
|
||||
|
||||
vm_name 'calibre-windows-build'
|
||||
root 'C:/r'
|
||||
python 'C:/py/python.exe'
|
||||
perl 'C:/Strawberry/perl/bin/perl.exe'
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from ereader pdb file.
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from ereader pdb file with a 132 byte header created by Dropbook.
|
||||
@ -87,9 +88,9 @@ class Reader132(FormatReader):
|
||||
|
||||
def get_image(self, number):
|
||||
if number < self.header_record.image_data_offset or number > self.header_record.image_data_offset + self.header_record.num_image_pages - 1:
|
||||
return 'empty', ''
|
||||
return 'empty', b''
|
||||
data = self.section_data(number)
|
||||
name = data[4:4 + 32].strip('\x00')
|
||||
name = data[4:4 + 32].strip(b'\x00').decode(self.encoding or 'cp1252')
|
||||
img = data[62:]
|
||||
return name, img
|
||||
|
||||
@ -116,9 +117,9 @@ class Reader132(FormatReader):
|
||||
title = self.mi.title
|
||||
if not isinstance(title, unicode_type):
|
||||
title = title.decode('utf-8', 'replace')
|
||||
html = u'<html><head><title>%s</title></head><body>' % title
|
||||
html = '<html><head><title>%s</title></head><body>' % title
|
||||
|
||||
pml = u''
|
||||
pml = ''
|
||||
for i in range(1, self.header_record.num_text_pages + 1):
|
||||
self.log.debug('Extracting text page %i' % i)
|
||||
pml += self.get_text_page(i)
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Interface defining the necessary public functions for a pdb format writer.
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from Haodoo.net pdb file.
|
||||
@ -64,7 +65,7 @@ class LegacyHeaderRecord(object):
|
||||
self.title = fix_punct(fields[0].decode('cp950', 'replace'))
|
||||
self.num_records = int(fields[1])
|
||||
self.chapter_titles = list(map(
|
||||
lambda x: fix_punct(x.decode('cp950', 'replace').rstrip(b'\x00')),
|
||||
lambda x: fix_punct(x.decode('cp950', 'replace').rstrip('\x00')),
|
||||
fields[2:]))
|
||||
|
||||
|
||||
@ -76,7 +77,7 @@ class UnicodeHeaderRecord(object):
|
||||
self.title = fix_punct(fields[0].decode('utf_16_le', 'ignore'))
|
||||
self.num_records = int(fields[1])
|
||||
self.chapter_titles = list(map(
|
||||
lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip(b'\x00')),
|
||||
lambda x: fix_punct(x.decode('utf_16_le', 'replace').rstrip('\x00')),
|
||||
fields[2].split(b'\r\x00\n\x00')))
|
||||
|
||||
|
||||
@ -99,18 +100,18 @@ class Reader(FormatReader):
|
||||
|
||||
def author(self):
|
||||
self.stream.seek(35)
|
||||
version = struct.unpack(b'>b', self.stream.read(1))[0]
|
||||
version = struct.unpack('>b', self.stream.read(1))[0]
|
||||
if version == 2:
|
||||
self.stream.seek(0)
|
||||
author = self.stream.read(35).rstrip(b'\x00').decode(self.encoding, 'replace')
|
||||
return author
|
||||
else:
|
||||
return u'Unknown'
|
||||
return 'Unknown'
|
||||
|
||||
def get_metadata(self):
|
||||
mi = MetaInformation(self.header_record.title,
|
||||
[self.author()])
|
||||
mi.language = u'zh-tw'
|
||||
mi.language = 'zh-tw'
|
||||
|
||||
return mi
|
||||
|
||||
@ -119,10 +120,10 @@ class Reader(FormatReader):
|
||||
|
||||
def decompress_text(self, number):
|
||||
return self.section_data(number).decode(self.encoding,
|
||||
'replace').rstrip(b'\x00')
|
||||
'replace').rstrip('\x00')
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
txt = u''
|
||||
txt = ''
|
||||
|
||||
self.log.info(u'Decompressing text...')
|
||||
for i in range(1, self.header_record.num_records + 1):
|
||||
@ -134,23 +135,23 @@ class Reader(FormatReader):
|
||||
line = fix_punct(line)
|
||||
line = line.strip()
|
||||
if not title_added and title in line:
|
||||
line = u'<h1 class="chapter">' + line + u'</h1>\n'
|
||||
line = '<h1 class="chapter">' + line + '</h1>\n'
|
||||
title_added = True
|
||||
else:
|
||||
line = prepare_string_for_xml(line)
|
||||
lines.append(u'<p>%s</p>' % line)
|
||||
lines.append('<p>%s</p>' % line)
|
||||
if not title_added:
|
||||
lines.insert(0, u'<h1 class="chapter">' + title + u'</h1>\n')
|
||||
txt += u'\n'.join(lines)
|
||||
lines.insert(0, '<h1 class="chapter">' + title + '</h1>\n')
|
||||
txt += '\n'.join(lines)
|
||||
|
||||
self.log.info(u'Converting text to OEB...')
|
||||
html = HTML_TEMPLATE % (self.header_record.title, txt)
|
||||
with open(os.path.join(output_dir, u'index.html'), 'wb') as index:
|
||||
with open(os.path.join(output_dir, 'index.html'), 'wb') as index:
|
||||
index.write(html.encode('utf-8'))
|
||||
|
||||
mi = self.get_metadata()
|
||||
manifest = [(u'index.html', None)]
|
||||
spine = [u'index.html']
|
||||
opf_writer(output_dir, u'metadata.opf', manifest, spine, mi)
|
||||
manifest = [('index.html', None)]
|
||||
spine = ['index.html']
|
||||
opf_writer(output_dir, 'metadata.opf', manifest, spine, mi)
|
||||
|
||||
return os.path.join(output_dir, u'metadata.opf')
|
||||
return os.path.join(output_dir, 'metadata.opf')
|
||||
|
@ -1,4 +1,7 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read the header data from a pdb file.
|
||||
'''
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from palmdoc pdb file.
|
||||
@ -49,7 +50,7 @@ class Reader(FormatReader):
|
||||
if self.header_record.compression == 2 or self.header_record.compression == 258:
|
||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||
return decompress_doc(self.section_data(number))
|
||||
return ''
|
||||
return b''
|
||||
|
||||
def extract_content(self, output_dir):
|
||||
raw_txt = b''
|
||||
|
@ -1,5 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import division
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Writer content to palmdoc pdb file.
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from palmdoc pdb file.
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
@ -7,4 +8,3 @@ __docformat__ = 'restructuredtext en'
|
||||
|
||||
class zTXTError(Exception):
|
||||
pass
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Read content from ztxt pdb file.
|
||||
|
@ -1,4 +1,5 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
|
||||
'''
|
||||
Writer content to ztxt pdb file.
|
||||
@ -67,7 +68,7 @@ class Writer(FormatWriter):
|
||||
return txt_records, txt_length
|
||||
|
||||
def _header_record(self, txt_length, record_count, crc32):
|
||||
record = ''
|
||||
record = b''
|
||||
|
||||
record += struct.pack('>H', 0x012c) # [0:2], version. 0x012c = 1.44
|
||||
record += struct.pack('>H', record_count) # [2:4], Number of PDB records used for the text of the book.
|
||||
@ -83,4 +84,3 @@ class Writer(FormatWriter):
|
||||
record += struct.pack('>LL', 0, 0) # [24:32], padding
|
||||
|
||||
return record
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user