mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
cleanup.
This commit is contained in:
parent
c0cf0e91d4
commit
377313df7d
@ -183,6 +183,9 @@ class SectionHeaderText(object):
|
|||||||
def __init__(self, section_header, raw):
|
def __init__(self, section_header, raw):
|
||||||
# The uncompressed size of each paragraph.
|
# The uncompressed size of each paragraph.
|
||||||
self.sizes = []
|
self.sizes = []
|
||||||
|
# uncompressed offset of each paragraph starting
|
||||||
|
# at the beginning of the PHTML.
|
||||||
|
self.paragraph_offsets = []
|
||||||
# Paragraph attributes.
|
# Paragraph attributes.
|
||||||
self.attributes = []
|
self.attributes = []
|
||||||
|
|
||||||
@ -191,6 +194,11 @@ class SectionHeaderText(object):
|
|||||||
self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
|
self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0])
|
||||||
self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
|
self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0])
|
||||||
|
|
||||||
|
running_offset = 0
|
||||||
|
for size in self.sizes:
|
||||||
|
running_offset += size
|
||||||
|
self.paragraph_offsets.append(running_offset)
|
||||||
|
|
||||||
|
|
||||||
class SectionMetadata(object):
|
class SectionMetadata(object):
|
||||||
'''
|
'''
|
||||||
@ -299,6 +307,7 @@ class Reader(FormatReader):
|
|||||||
* UTF 16 and 32 characters.
|
* UTF 16 and 32 characters.
|
||||||
* Margins.
|
* Margins.
|
||||||
* Alignment.
|
* Alignment.
|
||||||
|
* Font color.
|
||||||
* DATATYPE_MAILTO
|
* DATATYPE_MAILTO
|
||||||
* DATATYPE_TABLE(_COMPRESSED)
|
* DATATYPE_TABLE(_COMPRESSED)
|
||||||
* DATATYPE_EXT_ANCHOR_INDEX
|
* DATATYPE_EXT_ANCHOR_INDEX
|
||||||
@ -381,13 +390,13 @@ class Reader(FormatReader):
|
|||||||
html = u'<html><body>'
|
html = u'<html><body>'
|
||||||
section_header, section_data = self.sections[num]
|
section_header, section_data = self.sections[num]
|
||||||
if section_header.type == DATATYPE_PHTML:
|
if section_header.type == DATATYPE_PHTML:
|
||||||
html += self.process_phtml(section_data.header, section_data.data)
|
html += self.process_phtml(section_data.data, section_data.header.paragraph_offsets)
|
||||||
elif section_header.type == DATATYPE_PHTML_COMPRESSED:
|
elif section_header.type == DATATYPE_PHTML_COMPRESSED:
|
||||||
d = self.decompress_phtml(section_data.data)
|
d = self.decompress_phtml(section_data.data)
|
||||||
html += self.process_phtml(section_data.header, d).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
|
html += self.process_phtml(d, section_data.header.paragraph_offsets).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
|
||||||
html += '</body></html>'
|
html += '</body></html>'
|
||||||
htmlf.write(html.encode('utf-8'))
|
htmlf.write(html.encode('utf-8'))
|
||||||
|
|
||||||
# Images.
|
# Images.
|
||||||
# Cache the image sizes in case they are used by a composite image.
|
# Cache the image sizes in case they are used by a composite image.
|
||||||
image_sizes = {}
|
image_sizes = {}
|
||||||
@ -498,7 +507,7 @@ class Reader(FormatReader):
|
|||||||
#from calibre.ebooks.compression.palmdoc import decompress_doc
|
#from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
return decompress_doc(data)
|
return decompress_doc(data)
|
||||||
|
|
||||||
def process_phtml(self, sub_header, d):
|
def process_phtml(self, d, paragraph_offsets=[]):
|
||||||
html = u'<p id="p0">'
|
html = u'<p id="p0">'
|
||||||
offset = 0
|
offset = 0
|
||||||
paragraph_open = True
|
paragraph_open = True
|
||||||
@ -506,11 +515,6 @@ class Reader(FormatReader):
|
|||||||
need_set_p_id = False
|
need_set_p_id = False
|
||||||
p_num = 1
|
p_num = 1
|
||||||
font_specifier_close = ''
|
font_specifier_close = ''
|
||||||
paragraph_offsets = []
|
|
||||||
running_offset = 0
|
|
||||||
for size in sub_header.sizes:
|
|
||||||
running_offset += size
|
|
||||||
paragraph_offsets.append(running_offset)
|
|
||||||
|
|
||||||
while offset < len(d):
|
while offset < len(d):
|
||||||
if not paragraph_open:
|
if not paragraph_open:
|
||||||
@ -754,4 +758,7 @@ class Reader(FormatReader):
|
|||||||
return html
|
return html
|
||||||
|
|
||||||
def get_text_uid_encoding(self, uid):
|
def get_text_uid_encoding(self, uid):
|
||||||
|
# Return the user sepcified input encoding,
|
||||||
|
# otherwise return the alternate encoding specified for the uid,
|
||||||
|
# otherwise retur the default encoding for the document.
|
||||||
return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)
|
return self.options.input_encoding if self.options.input_encoding else self.uid_text_secion_encoding.get(uid, self.default_encoding)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user