This commit is contained in:
Kovid Goyal 2011-08-04 11:13:25 -06:00
parent f71cad4339
commit 4192578a7c
2 changed files with 15 additions and 9 deletions

View File

@ -1024,8 +1024,14 @@ class IndexRecord(object): # {{{
for entry in self.indices:
offset = entry.offset
a(str(entry))
t = self.alltext
if offset is not None and self.alltext is not None:
a('\tHTML at offset: %r'%self.alltext[offset:offset+100])
a('\tHTML before offset: %r'%t[offset-50:offset])
a('\tHTML after offset: %r'%t[offset:offset+50])
p = offset+entry.size
a('\tHTML before end: %r'%t[p-50:p])
a('\tHTML after end: %r'%t[p:p+50])
a('')
return '\n'.join(ans)
@ -1053,11 +1059,12 @@ class CNCX(object): # {{{
self.records[pos+record_offset] = raw[
pos+consumed:pos+consumed+length].decode(codec)
except:
byts = raw[pos+consumed:pos+consumed+length]
byts = raw[pos:]
r = format_bytes(byts)
print ('CNCX entry at offset %d has unknown format %s'%(
pos+record_offset, r))
self.records[pos+record_offset] = r
pos = len(raw)
pos += consumed+length
record_offset += 0x10000

View File

@ -110,6 +110,7 @@ class Serializer(object):
self.serialize_head()
self.serialize_body()
buf.write(b'</html>')
self.end_offset = buf.tell()
self.fixup_links()
return buf.getvalue()
@ -206,20 +207,18 @@ class Serializer(object):
self.breaks.append(buf.tell() - 1)
self.id_offsets[urlnormalize(item.href)] = buf.tell()
if item.is_section_start:
buf.write(b'<div>')
buf.write(b'<a ></a> ')
if item.is_article_start:
buf.write(b'<div>')
buf.write(b'<a ></a> <a ></a>')
for elem in item.data.find(XHTML('body')):
self.serialize_elem(elem, item)
if item.is_article_end:
# Kindle periodical article end marker
buf.write(b'<div></div>')
if self.write_page_breaks_after_item:
buf.write(b'<mbp:pagebreak/>')
if item.is_article_end:
buf.write(b'</div>')
# Kindle periodical article end marker
buf.write(b'<a ></a> <a ></a>')
if item.is_section_end:
buf.write(b'</div>')
buf.write(b' <a ></a>')
self.anchor_offset = None
def serialize_elem(self, elem, item, nsrmap=NSRMAP):