mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Port basic AZW3 output
This commit is contained in:
parent
2a2a08b660
commit
a07ad9633f
@ -21,6 +21,24 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
||||
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
|
||||
|
||||
|
||||
class PolyglotDict(dict):
|
||||
|
||||
def __setitem__(self, key, val):
|
||||
if isinstance(key, unicode_type):
|
||||
key = key.encode('utf-8')
|
||||
dict.__setitem__(self, key, val)
|
||||
|
||||
def __getitem__(self, key):
|
||||
if isinstance(key, unicode_type):
|
||||
key = key.encode('utf-8')
|
||||
return dict.__getitem__(self, key)
|
||||
|
||||
def __contains__(self, key):
|
||||
if isinstance(key, unicode_type):
|
||||
key = key.encode('utf-8')
|
||||
return dict.__contains__(self, key)
|
||||
|
||||
|
||||
def decode_string(raw, codec='utf-8', ordt_map=None):
|
||||
length, = struct.unpack(b'>B', raw[0:1])
|
||||
raw = raw[1:1+length]
|
||||
|
@ -145,7 +145,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
if datestr is None:
|
||||
raise ValueError("missing date or timestamp")
|
||||
|
||||
datestr = bytes(datestr)
|
||||
datestr = datestr.encode('utf-8')
|
||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||
exth.write(datestr)
|
||||
nrecs += 1
|
||||
@ -179,7 +179,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
if thumbnail_offset is not None:
|
||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||
thumbnail_offset))
|
||||
thumbnail_uri_str = bytes('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4)))
|
||||
thumbnail_uri_str = ('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4))).encode('utf-8')
|
||||
exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8))
|
||||
exth.write(thumbnail_uri_str)
|
||||
nrecs += 2
|
||||
@ -217,7 +217,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
||||
nrecs += 1
|
||||
|
||||
if page_progression_direction in {'rtl', 'ltr', 'default'}:
|
||||
ppd = bytes(page_progression_direction)
|
||||
ppd = page_progression_direction.encode('ascii')
|
||||
exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8))
|
||||
exth.write(ppd)
|
||||
nrecs += 1
|
||||
|
@ -15,7 +15,7 @@ from xml.sax.saxutils import escape
|
||||
from lxml import etree
|
||||
|
||||
from calibre.ebooks.oeb.base import XHTML_NS, extract
|
||||
from calibre.ebooks.mobi.utils import to_base
|
||||
from calibre.ebooks.mobi.utils import to_base, PolyglotDict
|
||||
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr
|
||||
|
||||
CHUNK_SIZE = 8192
|
||||
@ -34,7 +34,7 @@ aid_able_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
|
||||
'video'}
|
||||
|
||||
_self_closing_pat = re.compile(
|
||||
r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags|{'script',
|
||||
br'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags|{'script',
|
||||
'style', 'title', 'head'})).encode('ascii'),
|
||||
re.IGNORECASE)
|
||||
|
||||
@ -116,7 +116,7 @@ class Skeleton(object):
|
||||
|
||||
def render(self, root):
|
||||
raw = tostring(root, xml_declaration=True)
|
||||
raw = raw.replace(b'<html', bytes('<html xmlns="%s"'%XHTML_NS), 1)
|
||||
raw = raw.replace(b'<html', ('<html xmlns="%s"'%XHTML_NS).encode('ascii'), 1)
|
||||
raw = close_self_closing_tags(raw)
|
||||
return raw
|
||||
|
||||
@ -180,7 +180,7 @@ class Chunker(object):
|
||||
with_tail=True))
|
||||
orig_dumps[-1] = close_self_closing_tags(
|
||||
orig_dumps[-1].replace(b'<html',
|
||||
bytes('<html xmlns="%s"'%XHTML_NS), 1))
|
||||
('<html xmlns="%s"'%XHTML_NS).encode('ascii'), 1))
|
||||
|
||||
# First pass: break up document into rendered strings of length no
|
||||
# more than CHUNK_SIZE
|
||||
@ -366,7 +366,7 @@ class Chunker(object):
|
||||
# The first number is an index into the chunk table and the second is
|
||||
# an offset from the start of the chunk to the start of the tag pointed
|
||||
# to by the link.
|
||||
aid_map = {} # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
|
||||
aid_map = PolyglotDict() # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
|
||||
for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text):
|
||||
offset = match.start()
|
||||
pos_fid = None
|
||||
@ -395,9 +395,9 @@ class Chunker(object):
|
||||
def to_placeholder(aid):
|
||||
pos, fid, _ = aid_map[aid]
|
||||
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
||||
return bytes(':off:'.join((pos, fid)))
|
||||
return ':off:'.join((pos, fid)).encode('utf-8')
|
||||
|
||||
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
||||
placeholder_map = {k:to_placeholder(v) for k, v in
|
||||
iteritems(self.placeholder_map)}
|
||||
|
||||
# Now update the links
|
||||
|
Loading…
x
Reference in New Issue
Block a user