diff --git a/src/calibre/ebooks/pdb/ereader/reader132.py b/src/calibre/ebooks/pdb/ereader/reader132.py index 98dbe13790..49fdfb8980 100644 --- a/src/calibre/ebooks/pdb/ereader/reader132.py +++ b/src/calibre/ebooks/pdb/ereader/reader132.py @@ -34,7 +34,6 @@ class HeaderRecord(object): self.has_metadata, = struct.unpack('>H', raw[24:26]) self.footnote_rec, = struct.unpack('>H', raw[28:30]) self.sidebar_rec, = struct.unpack('>H', raw[30:32]) - self.bookmark_offset, = struct.unpack('>H', raw[32:34]) self.image_data_offset, = struct.unpack('>H', raw[40:42]) self.metadata_offset, = struct.unpack('>H', raw[44:46]) self.footnote_offset, = struct.unpack('>H', raw[48:50]) diff --git a/src/calibre/ebooks/pdb/ereader/writer.py b/src/calibre/ebooks/pdb/ereader/writer.py index 2f4e3bf16f..8a88c6a689 100644 --- a/src/calibre/ebooks/pdb/ereader/writer.py +++ b/src/calibre/ebooks/pdb/ereader/writer.py @@ -28,7 +28,7 @@ IDENTITY = 'PNRdPPrs' # This is an arbitrary number that is small enough to work. The actual maximum # record size is unknown. -MAX_RECORD_SIZE = 3560 +MAX_RECORD_SIZE = 8192 class Writer(FormatWriter): @@ -37,13 +37,28 @@ class Writer(FormatWriter): self.log = log def write_content(self, oeb_book, out_stream, metadata=None): - text, image_hrefs = self._text(oeb_book) + text, image_hrefs, text_sizes = self._text(oeb_book) images = self._images(oeb_book.manifest, image_hrefs) metadata = [self._metadata(metadata)] - hr = [self._header_record(len(text), len(images))] - sections = hr+text+images+metadata+['MeTaInFo\x00'] + ''' + Record order as generated by Dropbook. + 1. eReader Header + 2. Compressed text + 3. Small font page index + 4. Large font page index + 5. Chapter index + 6. Links index + 7. Images + 8. (Extrapolation: there should be one more record type here though yet uncovered what it might be). + 9. Metadata + 10. Sidebar records + 11. Footnote records + 12. Text block size record + 13. "MeTaInFo\x00" word record + ''' + sections = hr+text+images+metadata+[text_sizes]+['MeTaInFo\x00'] lengths = [len(i) if i not in images else len(i[0]) + len(i[1]) for i in sections] @@ -62,12 +77,38 @@ class Writer(FormatWriter): pml = unicode(pmlmlizer.extract_content(oeb_book, self.opts)).encode('cp1252', 'replace') pml_pages = [] - for i in range(0, (len(pml) / MAX_RECORD_SIZE) + 1): - pml_pages.append(zlib.compress(pml[i * MAX_RECORD_SIZE : (i * MAX_RECORD_SIZE) + MAX_RECORD_SIZE])) + text_sizes = '' + index = 0 + while index < len(pml): + ''' + Split on the space character closest to MAX_RECORD_SIZE when possible. + ''' + split = pml.rfind(' ', index, MAX_RECORD_SIZE) + if split == -1: + len_end = len(pml[index:]) + if len_end > MAX_RECORD_SIZE: + split = MAX_RECORD_SIZE + else: + split = len_end + if split == 0: + split = 1 + pml_pages.append(zlib.compress(pml[index:index+split])) + text_sizes += struct.pack('>H', split) + index += split - return pml_pages, pmlmlizer.image_hrefs + return pml_pages, pmlmlizer.image_hrefs, text_sizes def _images(self, manifest, image_hrefs): + ''' + Image format. + + 0-4 : 'PNG '. There must be a space after PNG. + 4-36 : Image name. Must be exactly 32 bytes long. Pad with \x00 for names shorter than 32 bytes + 36-58 : Unknown. + 58-60 : Width. + 60-62 : Height. + 62-...: Raw image data in 8 bit PNG format. + ''' images = [] for item in manifest: @@ -82,6 +123,8 @@ class Writer(FormatWriter): header = 'PNG ' header += image_hrefs[item.href].ljust(32, '\x00')[:32] + header = header.ljust(58, '\x00') + header += struct.pack('>HH', im.size[0], im.size[1]) header = header.ljust(62, '\x00') if len(data) + len(header) < 65505: @@ -126,7 +169,7 @@ class Writer(FormatWriter): text_items = the number of text pages image_items = the number of images ''' - version = 10 # Zlib compression + compression = 10 # zlib compression. non_text_offset = text_items + 1 if image_items > 0: @@ -140,33 +183,33 @@ class Writer(FormatWriter): record = '' - record += struct.pack('>H', version) # [0:2] # Version. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM - record += struct.pack('>H', 0) # [2:4] - record += struct.pack('>H', 0) # [4:6] + record += struct.pack('>H', compression) # [0:2] # Compression. Specifies compression and drm. 2 = palmdoc, 10 = zlib. 260 and 272 = DRM + record += struct.pack('>H', 0) # [2:4] # Unknown. + record += struct.pack('>H', 0) # [4:6] # Unknown. record += struct.pack('>H', 25152) # [6:8] # 25152 is MAGIC. Somehow represents the cp1252 encoding of the text - record += struct.pack('>H', 0) # [8:10] - record += struct.pack('>H', 0) # [10:12] - record += struct.pack('>H', non_text_offset) # [12:14] # non_text_offset - record += struct.pack('>H', 0) # [14:16] - record += struct.pack('>H', 0) # [16:18] - record += struct.pack('>H', 0) # [18:20] - record += struct.pack('>H', image_items) # [20:22] # Number of images - record += struct.pack('>H', 0) # [22:24] - record += struct.pack('>H', 1) # [24:26] # 1 if has metadata, 0 if not - record += struct.pack('>H', 0) # [26:28] - record += struct.pack('>H', 0) # [28:30] # footnote_rec - record += struct.pack('>H', 0) # [30:32] # sidebar_rec - record += struct.pack('>H', last_data_offset) # [32:34] # bookmark_offset - record += struct.pack('>H', 2560) # [34:36] # 2560 is MAGIC - record += struct.pack('>H', 0) # [36:38] - record += struct.pack('>H', 0) # [38:40] - record += struct.pack('>H', image_data_offset) # [40:42] # image_data_offset. This will be the last data offset if there are no images - record += struct.pack('>H', 0) # [42:44] - record += struct.pack('>H', meta_data_offset) # [44:46] # meta_data_offset. This will be the last data offset if there are no images - record += struct.pack('>H', 0) # [46:48] - record += struct.pack('>H', last_data_offset) # [48:50] # footnote_offset. This will be the last data offset if there are no images - record += struct.pack('>H', last_data_offset) # [50:52] # sidebar_offset. This will be the last data offset if there are no images - record += struct.pack('>H', last_data_offset) # [52:54] # last_data_offset + record += struct.pack('>H', 0) # [8:10] # Number of small font pages. 0 if page index is not built. + record += struct.pack('>H', 0) # [10:12] # Number of large font pages. 0 if page index is not built. + record += struct.pack('>H', non_text_offset) # [12:14] # Non-Text record start. + record += struct.pack('>H', 0) # [14:16] # Number of chapter index records. + record += struct.pack('>H', 0) # [16:18] # Number of small font page index records. + record += struct.pack('>H', 0) # [18:20] # Number of large font page index records. + record += struct.pack('>H', image_items) # [20:22] # Number of images. + record += struct.pack('>H', 0) # [22:24] # Number of links. + record += struct.pack('>H', 1) # [24:26] # 1 if has metadata, 0 if not. + record += struct.pack('>H', 0) # [26:28] # Unknown. + record += struct.pack('>H', 0) # [28:30] # Number of Footnotes. + record += struct.pack('>H', 0) # [30:32] # Number of Sidebars. + record += struct.pack('>H', last_data_offset) # [32:34] # Chapter index offset. + record += struct.pack('>H', 2560) # [34:36] # 2560 is MAGIC. + record += struct.pack('>H', last_data_offset) # [36:38] # Small font page offset. This will be the last data offset if there are none. + record += struct.pack('>H', last_data_offset) # [38:40] # Large font page offset. This will be the last data offset if there are none. + record += struct.pack('>H', image_data_offset) # [40:42] # Image offset. This will be the last data offset if there are none. + record += struct.pack('>H', image_data_offset) # [42:44] # Links offset. This will be the last data offset if there are none. + record += struct.pack('>H', meta_data_offset) # [44:46] # Metadata offset. This will be the last data offset if there are none. + record += struct.pack('>H', 0) # [46:48] # Unknown. + record += struct.pack('>H', last_data_offset) # [48:50] # Footnote offset. This will be the last data offset if there are none. + record += struct.pack('>H', last_data_offset) # [50:52] # Sidebar offset. This will be the last data offset if there are none. + record += struct.pack('>H', last_data_offset) # [52:54] # Last data offset. for i in range(54, 132, 2): record += struct.pack('>H', 0) # [54:132] diff --git a/src/calibre/ebooks/pml/pmlconverter.py b/src/calibre/ebooks/pml/pmlconverter.py index b4ab238da9..ca7721350c 100644 --- a/src/calibre/ebooks/pml/pmlconverter.py +++ b/src/calibre/ebooks/pml/pmlconverter.py @@ -64,7 +64,7 @@ PML_HTML_RULES = [ (re.compile(r'(?<=[^\\])\\Sp'), lambda match: ''), (re.compile(r'(?<=[^\\])\\Sb'), lambda match: ''), # Remove invalid single item pml codes. - (re.compile(r'(?<=[^\\])\\.'), lambda match: ''), + (re.compile(r'(?<=[^\\])\\[^\\]'), lambda match: ''), # Replace \\ with \. (re.compile(r'\\\\'), lambda match: '\\'), @@ -78,6 +78,7 @@ def pml_to_html(pml): return html def footnote_sidebar_to_html(id, pml): + if id.startswith('\x01'): + id = id[2:] html = '