mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
py3: Port basic AZW3 output
This commit is contained in:
parent
2a2a08b660
commit
a07ad9633f
@ -21,6 +21,24 @@ IMAGE_MAX_SIZE = 10 * 1024 * 1024
|
|||||||
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
|
RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
|
||||||
|
|
||||||
|
|
||||||
|
class PolyglotDict(dict):
|
||||||
|
|
||||||
|
def __setitem__(self, key, val):
|
||||||
|
if isinstance(key, unicode_type):
|
||||||
|
key = key.encode('utf-8')
|
||||||
|
dict.__setitem__(self, key, val)
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if isinstance(key, unicode_type):
|
||||||
|
key = key.encode('utf-8')
|
||||||
|
return dict.__getitem__(self, key)
|
||||||
|
|
||||||
|
def __contains__(self, key):
|
||||||
|
if isinstance(key, unicode_type):
|
||||||
|
key = key.encode('utf-8')
|
||||||
|
return dict.__contains__(self, key)
|
||||||
|
|
||||||
|
|
||||||
def decode_string(raw, codec='utf-8', ordt_map=None):
|
def decode_string(raw, codec='utf-8', ordt_map=None):
|
||||||
length, = struct.unpack(b'>B', raw[0:1])
|
length, = struct.unpack(b'>B', raw[0:1])
|
||||||
raw = raw[1:1+length]
|
raw = raw[1:1+length]
|
||||||
|
@ -145,7 +145,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
|||||||
if datestr is None:
|
if datestr is None:
|
||||||
raise ValueError("missing date or timestamp")
|
raise ValueError("missing date or timestamp")
|
||||||
|
|
||||||
datestr = bytes(datestr)
|
datestr = datestr.encode('utf-8')
|
||||||
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
|
||||||
exth.write(datestr)
|
exth.write(datestr)
|
||||||
nrecs += 1
|
nrecs += 1
|
||||||
@ -179,7 +179,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
|||||||
if thumbnail_offset is not None:
|
if thumbnail_offset is not None:
|
||||||
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12,
|
||||||
thumbnail_offset))
|
thumbnail_offset))
|
||||||
thumbnail_uri_str = bytes('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4)))
|
thumbnail_uri_str = ('kindle:embed:%s' %(to_base(thumbnail_offset, base=32, min_num_digits=4))).encode('utf-8')
|
||||||
exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8))
|
exth.write(pack(b'>II', EXTH_CODES['kf8_thumbnail_uri'], len(thumbnail_uri_str) + 8))
|
||||||
exth.write(thumbnail_uri_str)
|
exth.write(thumbnail_uri_str)
|
||||||
nrecs += 2
|
nrecs += 2
|
||||||
@ -217,7 +217,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
|
|||||||
nrecs += 1
|
nrecs += 1
|
||||||
|
|
||||||
if page_progression_direction in {'rtl', 'ltr', 'default'}:
|
if page_progression_direction in {'rtl', 'ltr', 'default'}:
|
||||||
ppd = bytes(page_progression_direction)
|
ppd = page_progression_direction.encode('ascii')
|
||||||
exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8))
|
exth.write(pack(b'>II', EXTH_CODES['page_progression_direction'], len(ppd) + 8))
|
||||||
exth.write(ppd)
|
exth.write(ppd)
|
||||||
nrecs += 1
|
nrecs += 1
|
||||||
|
@ -15,7 +15,7 @@ from xml.sax.saxutils import escape
|
|||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
from calibre.ebooks.oeb.base import XHTML_NS, extract
|
from calibre.ebooks.oeb.base import XHTML_NS, extract
|
||||||
from calibre.ebooks.mobi.utils import to_base
|
from calibre.ebooks.mobi.utils import to_base, PolyglotDict
|
||||||
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr
|
from polyglot.builtins import iteritems, unicode_type, codepoint_to_chr as mychr
|
||||||
|
|
||||||
CHUNK_SIZE = 8192
|
CHUNK_SIZE = 8192
|
||||||
@ -34,7 +34,7 @@ aid_able_tags = {'a', 'abbr', 'address', 'article', 'aside', 'audio', 'b',
|
|||||||
'video'}
|
'video'}
|
||||||
|
|
||||||
_self_closing_pat = re.compile(
|
_self_closing_pat = re.compile(
|
||||||
r'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags|{'script',
|
br'<(?P<tag>%s)(?=[\s/])(?P<arg>[^>]*)/>'%('|'.join(aid_able_tags|{'script',
|
||||||
'style', 'title', 'head'})).encode('ascii'),
|
'style', 'title', 'head'})).encode('ascii'),
|
||||||
re.IGNORECASE)
|
re.IGNORECASE)
|
||||||
|
|
||||||
@ -116,7 +116,7 @@ class Skeleton(object):
|
|||||||
|
|
||||||
def render(self, root):
|
def render(self, root):
|
||||||
raw = tostring(root, xml_declaration=True)
|
raw = tostring(root, xml_declaration=True)
|
||||||
raw = raw.replace(b'<html', bytes('<html xmlns="%s"'%XHTML_NS), 1)
|
raw = raw.replace(b'<html', ('<html xmlns="%s"'%XHTML_NS).encode('ascii'), 1)
|
||||||
raw = close_self_closing_tags(raw)
|
raw = close_self_closing_tags(raw)
|
||||||
return raw
|
return raw
|
||||||
|
|
||||||
@ -180,7 +180,7 @@ class Chunker(object):
|
|||||||
with_tail=True))
|
with_tail=True))
|
||||||
orig_dumps[-1] = close_self_closing_tags(
|
orig_dumps[-1] = close_self_closing_tags(
|
||||||
orig_dumps[-1].replace(b'<html',
|
orig_dumps[-1].replace(b'<html',
|
||||||
bytes('<html xmlns="%s"'%XHTML_NS), 1))
|
('<html xmlns="%s"'%XHTML_NS).encode('ascii'), 1))
|
||||||
|
|
||||||
# First pass: break up document into rendered strings of length no
|
# First pass: break up document into rendered strings of length no
|
||||||
# more than CHUNK_SIZE
|
# more than CHUNK_SIZE
|
||||||
@ -366,7 +366,7 @@ class Chunker(object):
|
|||||||
# The first number is an index into the chunk table and the second is
|
# The first number is an index into the chunk table and the second is
|
||||||
# an offset from the start of the chunk to the start of the tag pointed
|
# an offset from the start of the chunk to the start of the tag pointed
|
||||||
# to by the link.
|
# to by the link.
|
||||||
aid_map = {} # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
|
aid_map = PolyglotDict() # Map of aid to (fid, offset_from_start_of_chunk, offset_from_start_of_text)
|
||||||
for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text):
|
for match in re.finditer(br'<[^>]+? [ac]id=[\'"]([cA-Z0-9]+)[\'"]', rebuilt_text):
|
||||||
offset = match.start()
|
offset = match.start()
|
||||||
pos_fid = None
|
pos_fid = None
|
||||||
@ -395,9 +395,9 @@ class Chunker(object):
|
|||||||
def to_placeholder(aid):
|
def to_placeholder(aid):
|
||||||
pos, fid, _ = aid_map[aid]
|
pos, fid, _ = aid_map[aid]
|
||||||
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
pos, fid = to_base(pos, min_num_digits=4), to_href(fid)
|
||||||
return bytes(':off:'.join((pos, fid)))
|
return ':off:'.join((pos, fid)).encode('utf-8')
|
||||||
|
|
||||||
placeholder_map = {bytes(k):to_placeholder(v) for k, v in
|
placeholder_map = {k:to_placeholder(v) for k, v in
|
||||||
iteritems(self.placeholder_map)}
|
iteritems(self.placeholder_map)}
|
||||||
|
|
||||||
# Now update the links
|
# Now update the links
|
||||||
|
Loading…
x
Reference in New Issue
Block a user