From 3bbd277d2b95f2b539a11362a1be128bbb818de9 Mon Sep 17 00:00:00 2001 From: John Schember Date: Wed, 22 Apr 2009 07:30:22 -0400 Subject: [PATCH] ereader reader debug output --- src/calibre/ebooks/pdb/ereader/__init__.py | 3 --- src/calibre/ebooks/pdb/ereader/pmlconverter.py | 2 +- src/calibre/ebooks/pdb/ereader/reader.py | 18 ++++++++++++------ src/calibre/ebooks/pdb/header.py | 6 +++--- 4 files changed, 16 insertions(+), 13 deletions(-) diff --git a/src/calibre/ebooks/pdb/ereader/__init__.py b/src/calibre/ebooks/pdb/ereader/__init__.py index f2f1761cad..89d9dfdd35 100644 --- a/src/calibre/ebooks/pdb/ereader/__init__.py +++ b/src/calibre/ebooks/pdb/ereader/__init__.py @@ -1,8 +1,5 @@ # -*- coding: utf-8 -*- from __future__ import with_statement -''' -Write content to TXT. -''' __license__ = 'GPL v3' __copyright__ = '2009, John Schember ' diff --git a/src/calibre/ebooks/pdb/ereader/pmlconverter.py b/src/calibre/ebooks/pdb/ereader/pmlconverter.py index 454510f699..250b74eb56 100644 --- a/src/calibre/ebooks/pdb/ereader/pmlconverter.py +++ b/src/calibre/ebooks/pdb/ereader/pmlconverter.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import with_statement ''' -Convert pml markup to html +Convert pml markup to and from html ''' __license__ = 'GPL v3' diff --git a/src/calibre/ebooks/pdb/ereader/reader.py b/src/calibre/ebooks/pdb/ereader/reader.py index 9354787447..f6bbc3d23f 100644 --- a/src/calibre/ebooks/pdb/ereader/reader.py +++ b/src/calibre/ebooks/pdb/ereader/reader.py @@ -46,15 +46,16 @@ class HeaderRecord(object): # They don't exist if offset is larget than last_record. # Todo: Determine if the subtraction is necessary and find out # what _rec means. - self.num_footnote_pages = self.sidebar_offset - self.footnote_offset if self.footnote_offset < self.last_data_offset else 0 + end_footnote_offset = self.sidebar_offset if self.sidebar_offset != self.footnote_offset else self.last_data_offset + self.num_footnote_pages = end_footnote_offset - self.footnote_offset if self.footnote_offset < self.last_data_offset else 0 self.num_sidebar_pages = self.sidebar_offset - self.last_data_offset if self.footnote_offset < self.last_data_offset else 0 class Reader(object): def __init__(self, header, stream, log): - raw = stream.read() - + self.log = log + self.sections = [] for i in range(header.num_sections): self.sections.append(header.section_data(i)) @@ -91,19 +92,19 @@ class Reader(object): assumed to be encoded as Windows-1252. The encoding is part of the eReader file spec and should always be this encoding. ''' - if number < 1 or number > self.header_record.num_text_pages: + if number not in range(1, self.header_record.num_text_pages): return '' return self.decompress_text(number) def get_footnote_page(self, number): - if number < self.header_record.footnote_offset or number > self.header_record.footnote_offset + self.header_record.num_footnote_pages - 1: + if number not in range(self.header_record.footnote_offset, self.header_record.footnote_offset + self.header_record.num_footnote_pages): return '' return self.decompress_text(number) def get_sidebar_page(self, number): - if number < self.header_record.sidebar_offset or number > self.header_record.sidebar_offset + self.header_record.num_sidebar_pages - 1: + if number not in range(self.header_record.sidebar_offset, self.header_record.sidebar_offset + self.header_record.num_sidebar_pages - 1): return '' return self.decompress_text(number) @@ -139,6 +140,7 @@ class Reader(object): html = '' for i in range(1, self.header_record.num_text_pages + 1): + self.log.debug('Extracting text page %i' % i) html += pml_to_html(self.get_text_page(i)) # Untested: The num_.._pages variable may not be correct! @@ -147,11 +149,13 @@ class Reader(object): if has_footnotes(): html += '

%s

' % _('Footnotes') for i in range(self.header_record.footnote_offset, self.header_record.num_footnote_pages): + self.log.debug('Extracting footnote page %i' % i) html += footnote_to_html(self.get_footnote_page(i)) if has_sidebar(): html += '

%s

' % _('Sidebar') for i in range(self.header_record.sidebar_offset, self.header_record.num_sidebar_pages): + self.log.debug('Extracting sidebar page %i' % i) html += sidebar_to_html(self.get_sidebar_page(i)) ''' @@ -159,6 +163,7 @@ class Reader(object): with CurrentDir(output_dir): with open('index.html', 'wb') as index: + self.log.debug('Writing text to index.html') index.write(html.encode('utf-8')) if not os.path.exists(os.path.join(output_dir, 'images/')): @@ -169,6 +174,7 @@ class Reader(object): name, img = self.get_image(self.header_record.image_data_offset + i) images.append(name) with open(name, 'wb') as imgf: + self.log.debug('Writing image %s to images/' % name) imgf.write(img) opf_path = self.create_opf(output_dir, images) diff --git a/src/calibre/ebooks/pdb/header.py b/src/calibre/ebooks/pdb/header.py index a3aa56a718..efa727dac9 100644 --- a/src/calibre/ebooks/pdb/header.py +++ b/src/calibre/ebooks/pdb/header.py @@ -32,7 +32,7 @@ class PdbHeader(object): return self.stream.read(32).replace('\x00', '') def full_section_info(self, number): - if number > self.num_sections: + if number not in range(0, self.num_sections): raise ValueError('Not a valid section number %i' % number) self.stream.seek(78+number*8) @@ -41,14 +41,14 @@ class PdbHeader(object): return (offset, flags, val) def section_offset(self, number): - if number > self.num_sections: + if number not in range(0, self.num_sections): raise ValueError('Not a valid section number %i' % number) self.stream.seek(78+number*8) return struct.unpack('>LBBBB', self.stream.read(8))[0] def section_data(self, number): - if number > self.num_sections: + if number not in range(0, self.num_sections): raise ValueError('Not a valid section number %i' % number) start = self.section_offset(number)