mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix decoding text. Add internal link support.
This commit is contained in:
parent
0f3228e658
commit
acaa06de53
@ -263,7 +263,7 @@ class Reader(FormatReader):
|
|||||||
elif section_header.type == DATATYPE_METADATA:
|
elif section_header.type == DATATYPE_METADATA:
|
||||||
self.metadata_section_number = section_number
|
self.metadata_section_number = section_number
|
||||||
section = SectionMetadata(raw_data[start:])
|
section = SectionMetadata(raw_data[start:])
|
||||||
elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
|
#elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
|
||||||
|
|
||||||
|
|
||||||
self.sections.append((section_header, section))
|
self.sections.append((section_header, section))
|
||||||
@ -285,10 +285,10 @@ class Reader(FormatReader):
|
|||||||
for uid, num in self.uid_text_secion_number.items():
|
for uid, num in self.uid_text_secion_number.items():
|
||||||
section_header, section_data = self.sections[num]
|
section_header, section_data = self.sections[num]
|
||||||
if section_header.type == DATATYPE_PHTML:
|
if section_header.type == DATATYPE_PHTML:
|
||||||
html += self.process_phtml(section_data.header, section_data.data.decode(self.get_text_uid_encoding(section_header.uid), 'replace'))
|
html += self.process_phtml(section_data.header, section_data.data)
|
||||||
elif section_header.type == DATATYPE_PHTML_COMPRESSED:
|
elif section_header.type == DATATYPE_PHTML_COMPRESSED:
|
||||||
d = self.decompress_phtml(section_data.data).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
|
d = self.decompress_phtml(section_data.data)
|
||||||
html += self.process_phtml(section_data.header, d)
|
html += self.process_phtml(section_header.uid, section_data.header, d).decode(self.get_text_uid_encoding(section_header.uid), 'replace')
|
||||||
|
|
||||||
html += '</body></html>'
|
html += '</body></html>'
|
||||||
|
|
||||||
@ -300,7 +300,6 @@ class Reader(FormatReader):
|
|||||||
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
if not os.path.exists(os.path.join(output_dir, 'images/')):
|
||||||
os.makedirs(os.path.join(output_dir, 'images/'))
|
os.makedirs(os.path.join(output_dir, 'images/'))
|
||||||
with CurrentDir(os.path.join(output_dir, 'images/')):
|
with CurrentDir(os.path.join(output_dir, 'images/')):
|
||||||
#im.read('/Users/john/Tmp/plkr/apnx.palm')
|
|
||||||
for uid, num in self.uid_image_section_number.items():
|
for uid, num in self.uid_image_section_number.items():
|
||||||
section_header, section_data = self.sections[num]
|
section_header, section_data = self.sections[num]
|
||||||
if section_data:
|
if section_data:
|
||||||
@ -340,10 +339,12 @@ class Reader(FormatReader):
|
|||||||
#from calibre.ebooks.compression.palmdoc import decompress_doc
|
#from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
return decompress_doc(data)
|
return decompress_doc(data)
|
||||||
|
|
||||||
def process_phtml(self, sub_header, d):
|
def process_phtml(self, uid, sub_header, d):
|
||||||
html = u''
|
html = u'<a id="p%s" /><p id="p%s-0">' % (uid, uid)
|
||||||
offset = 0
|
offset = 0
|
||||||
paragraph_open = False
|
paragraph_open = True
|
||||||
|
need_set_p_id = False
|
||||||
|
p_num = 1
|
||||||
paragraph_offsets = []
|
paragraph_offsets = []
|
||||||
running_offset = 0
|
running_offset = 0
|
||||||
for size in sub_header.sizes:
|
for size in sub_header.sizes:
|
||||||
@ -352,7 +353,12 @@ class Reader(FormatReader):
|
|||||||
|
|
||||||
while offset < len(d):
|
while offset < len(d):
|
||||||
if not paragraph_open:
|
if not paragraph_open:
|
||||||
html += u'<p>'
|
if need_set_p_id:
|
||||||
|
html += u'<p id="p%s-%s">' % (uid, p_num)
|
||||||
|
p_num += 1
|
||||||
|
need_set_p_id = False
|
||||||
|
else:
|
||||||
|
html += u'<p>'
|
||||||
paragraph_open = True
|
paragraph_open = True
|
||||||
|
|
||||||
c = ord(d[offset])
|
c = ord(d[offset])
|
||||||
@ -363,26 +369,36 @@ class Reader(FormatReader):
|
|||||||
# 2 Bytes
|
# 2 Bytes
|
||||||
# record ID
|
# record ID
|
||||||
if c == 0x0a:
|
if c == 0x0a:
|
||||||
offset += 2
|
offset += 1
|
||||||
|
id = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
|
html += '<a href="#p%s">' % id
|
||||||
|
offset += 1
|
||||||
# Targeted page link begins
|
# Targeted page link begins
|
||||||
# 3 Bytes
|
# 3 Bytes
|
||||||
# record ID, target
|
# record ID, target
|
||||||
elif c == 0x0b:
|
elif c == 0x0b:
|
||||||
offset += 3
|
offset += 3
|
||||||
|
html += '<a>'
|
||||||
# Paragraph link begins
|
# Paragraph link begins
|
||||||
# 4 Bytes
|
# 4 Bytes
|
||||||
# record ID, paragraph number
|
# record ID, paragraph number
|
||||||
elif c == 0x0c:
|
elif c == 0x0c:
|
||||||
offset += 4
|
offset += 1
|
||||||
|
id = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
|
offset += 2
|
||||||
|
pid = struct.unpack('>H', d[offset:offset+2])[0]
|
||||||
|
html += '<a href="#p%s-%s">' % (id, pid)
|
||||||
|
offset += 1
|
||||||
# Targeted paragraph link begins
|
# Targeted paragraph link begins
|
||||||
# 5 Bytes
|
# 5 Bytes
|
||||||
# record ID, paragraph number, target
|
# record ID, paragraph number, target
|
||||||
elif c == 0x0d:
|
elif c == 0x0d:
|
||||||
offset += 5
|
offset += 5
|
||||||
|
html += '<a>'
|
||||||
# Link ends
|
# Link ends
|
||||||
# 0 Bytes
|
# 0 Bytes
|
||||||
elif c == 0x08:
|
elif c == 0x08:
|
||||||
pass
|
html += '</a>'
|
||||||
# Set font
|
# Set font
|
||||||
# 1 Bytes
|
# 1 Bytes
|
||||||
# font specifier
|
# font specifier
|
||||||
@ -515,10 +531,11 @@ class Reader(FormatReader):
|
|||||||
html += unichr(c)
|
html += unichr(c)
|
||||||
offset += 1
|
offset += 1
|
||||||
if offset in paragraph_offsets:
|
if offset in paragraph_offsets:
|
||||||
|
need_set_p_id = True
|
||||||
if paragraph_open:
|
if paragraph_open:
|
||||||
html += u'</p>\n'
|
html += u'</p>\n'
|
||||||
paragraph_open = False
|
paragraph_open = False
|
||||||
|
|
||||||
if paragraph_open:
|
if paragraph_open:
|
||||||
html += u'</p>'
|
html += u'</p>'
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user