ereader reader debug output

This commit is contained in:
John Schember 2009-04-22 07:30:22 -04:00
parent e968f529da
commit 3bbd277d2b
4 changed files with 16 additions and 13 deletions

View File

@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Write content to TXT.
'''
__license__ = 'GPL v3'
__copyright__ = '2009, John Schember <john@nachtimwald.com>'

View File

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
from __future__ import with_statement
'''
Convert pml markup to html
Convert pml markup to and from html
'''
__license__ = 'GPL v3'

View File

@ -46,15 +46,16 @@ class HeaderRecord(object):
# They don't exist if offset is larget than last_record.
# Todo: Determine if the subtraction is necessary and find out
# what _rec means.
self.num_footnote_pages = self.sidebar_offset - self.footnote_offset if self.footnote_offset < self.last_data_offset else 0
end_footnote_offset = self.sidebar_offset if self.sidebar_offset != self.footnote_offset else self.last_data_offset
self.num_footnote_pages = end_footnote_offset - self.footnote_offset if self.footnote_offset < self.last_data_offset else 0
self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0
class Reader(object):
def __init__(self, header, stream, log):
raw = stream.read()
self.log = log
self.sections = []
for i in range(header.num_sections):
self.sections.append(header.section_data(i))
@ -91,19 +92,19 @@ class Reader(object):
assumed to be encoded as Windows-1252. The encoding is part of
the eReader file spec and should always be this encoding.
'''
if number < 1 or number > self.header_record.num_text_pages:
if number not in range(1, self.header_record.num_text_pages):
return ''
return self.decompress_text(number)
def get_footnote_page(self, number):
if number < self.header_record.footnote_offset or number > self.header_record.footnote_offset + self.header_record.num_footnote_pages - 1:
if number not in range(self.header_record.footnote_offset, self.header_record.footnote_offset + self.header_record.num_footnote_pages):
return ''
return self.decompress_text(number)
def get_sidebar_page(self, number):
if number < self.header_record.sidebar_offset or number > self.header_record.sidebar_offset + self.header_record.num_sidebar_pages - 1:
if number not in range(self.header_record.sidebar_offset, self.header_record.sidebar_offset + self.header_record.num_sidebar_pages - 1):
return ''
return self.decompress_text(number)
@ -139,6 +140,7 @@ class Reader(object):
html = '<html><head><title></title></head><body>'
for i in range(1, self.header_record.num_text_pages + 1):
self.log.debug('Extracting text page %i' % i)
html += pml_to_html(self.get_text_page(i))
# Untested: The num_.._pages variable may not be correct!
@ -147,11 +149,13 @@ class Reader(object):
if has_footnotes():
html += '<br /><h1>%s</h1>' % _('Footnotes')
for i in range(self.header_record.footnote_offset, self.header_record.num_footnote_pages):
self.log.debug('Extracting footnote page %i' % i)
html += footnote_to_html(self.get_footnote_page(i))
if has_sidebar():
html += '<br /><h1>%s</h1>' % _('Sidebar')
for i in range(self.header_record.sidebar_offset, self.header_record.num_sidebar_pages):
self.log.debug('Extracting sidebar page %i' % i)
html += sidebar_to_html(self.get_sidebar_page(i))
'''
@ -159,6 +163,7 @@ class Reader(object):
with CurrentDir(output_dir):
with open('index.html', 'wb') as index:
self.log.debug('Writing text to index.html')
index.write(html.encode('utf-8'))
if not os.path.exists(os.path.join(output_dir, 'images/')):
@ -169,6 +174,7 @@ class Reader(object):
name, img = self.get_image(self.header_record.image_data_offset + i)
images.append(name)
with open(name, 'wb') as imgf:
self.log.debug('Writing image %s to images/' % name)
imgf.write(img)
opf_path = self.create_opf(output_dir, images)

View File

@ -32,7 +32,7 @@ class PdbHeader(object):
return self.stream.read(32).replace('\x00', '')
def full_section_info(self, number):
if number > self.num_sections:
if number not in range(0, self.num_sections):
raise ValueError('Not a valid section number %i' % number)
self.stream.seek(78+number*8)
@ -41,14 +41,14 @@ class PdbHeader(object):
return (offset, flags, val)
def section_offset(self, number):
if number > self.num_sections:
if number not in range(0, self.num_sections):
raise ValueError('Not a valid section number %i' % number)
self.stream.seek(78+number*8)
return struct.unpack('>LBBBB', self.stream.read(8))[0]
def section_data(self, number):
if number > self.num_sections:
if number not in range(0, self.num_sections):
raise ValueError('Not a valid section number %i' % number)
start = self.section_offset(number)