This commit is contained in:
Kovid Goyal 2011-08-04 11:13:25 -06:00
parent f71cad4339
commit 4192578a7c
2 changed files with 15 additions and 9 deletions

View File

@ -1024,8 +1024,14 @@ class IndexRecord(object): # {{{
for entry in self.indices: for entry in self.indices:
offset = entry.offset offset = entry.offset
a(str(entry)) a(str(entry))
t = self.alltext
if offset is not None and self.alltext is not None: if offset is not None and self.alltext is not None:
a('\tHTML at offset: %r'%self.alltext[offset:offset+100]) a('\tHTML before offset: %r'%t[offset-50:offset])
a('\tHTML after offset: %r'%t[offset:offset+50])
p = offset+entry.size
a('\tHTML before end: %r'%t[p-50:p])
a('\tHTML after end: %r'%t[p:p+50])
a('') a('')
return '\n'.join(ans) return '\n'.join(ans)
@ -1053,11 +1059,12 @@ class CNCX(object): # {{{
self.records[pos+record_offset] = raw[ self.records[pos+record_offset] = raw[
pos+consumed:pos+consumed+length].decode(codec) pos+consumed:pos+consumed+length].decode(codec)
except: except:
byts = raw[pos+consumed:pos+consumed+length] byts = raw[pos:]
r = format_bytes(byts) r = format_bytes(byts)
print ('CNCX entry at offset %d has unknown format %s'%( print ('CNCX entry at offset %d has unknown format %s'%(
pos+record_offset, r)) pos+record_offset, r))
self.records[pos+record_offset] = r self.records[pos+record_offset] = r
pos = len(raw)
pos += consumed+length pos += consumed+length
record_offset += 0x10000 record_offset += 0x10000

View File

@ -110,6 +110,7 @@ class Serializer(object):
self.serialize_head() self.serialize_head()
self.serialize_body() self.serialize_body()
buf.write(b'</html>') buf.write(b'</html>')
self.end_offset = buf.tell()
self.fixup_links() self.fixup_links()
return buf.getvalue() return buf.getvalue()
@ -206,20 +207,18 @@ class Serializer(object):
self.breaks.append(buf.tell() - 1) self.breaks.append(buf.tell() - 1)
self.id_offsets[urlnormalize(item.href)] = buf.tell() self.id_offsets[urlnormalize(item.href)] = buf.tell()
if item.is_section_start: if item.is_section_start:
buf.write(b'<div>') buf.write(b'<a ></a> ')
if item.is_article_start: if item.is_article_start:
buf.write(b'<div>') buf.write(b'<a ></a> <a ></a>')
for elem in item.data.find(XHTML('body')): for elem in item.data.find(XHTML('body')):
self.serialize_elem(elem, item) self.serialize_elem(elem, item)
if item.is_article_end:
# Kindle periodical article end marker
buf.write(b'<div></div>')
if self.write_page_breaks_after_item: if self.write_page_breaks_after_item:
buf.write(b'<mbp:pagebreak/>') buf.write(b'<mbp:pagebreak/>')
if item.is_article_end: if item.is_article_end:
buf.write(b'</div>') # Kindle periodical article end marker
buf.write(b'<a ></a> <a ></a>')
if item.is_section_end: if item.is_section_end:
buf.write(b'</div>') buf.write(b' <a ></a>')
self.anchor_offset = None self.anchor_offset = None
def serialize_elem(self, elem, item, nsrmap=NSRMAP): def serialize_elem(self, elem, item, nsrmap=NSRMAP):