mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix bug 2342. ereader inspector script to aid in implementing writer. ereader writer tweaks.
This commit is contained in:
parent
1daf7bd86a
commit
ccdb992992
@ -95,6 +95,18 @@ class HTMLPreProcessor(object):
|
||||
|
||||
# Fix pdftohtml markup
|
||||
PDFTOHTML = [
|
||||
# Fix umlauts
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ö'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ö'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ü'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ü'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ë'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ï'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
||||
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
||||
|
||||
# Remove page links
|
||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||
# Remove <hr> tags
|
||||
|
87
src/calibre/ebooks/pdb/ereader/inspector.py
Normal file
87
src/calibre/ebooks/pdb/ereader/inspector.py
Normal file
@ -0,0 +1,87 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
'''
|
||||
Inspect the header of ereader files. This is primarily used for debugging.
|
||||
'''
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||
__docformat__ = 'restructuredtext en'
|
||||
|
||||
import struct, sys
|
||||
|
||||
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||
from calibre.ebooks.pdb.ereader.reader import HeaderRecord
|
||||
|
||||
def pdb_header_info(header):
|
||||
print 'PDB Header Info:'
|
||||
print ''
|
||||
print 'Identity: %s' % header.ident
|
||||
print 'Total Sectons: %s' % header.num_sections
|
||||
print 'Title: %s' % header.title
|
||||
print ''
|
||||
|
||||
def ereader_header_info(header):
|
||||
h0 = header.section_data(0)
|
||||
|
||||
print 'Ereader Record 0 (Header) Info:'
|
||||
print ''
|
||||
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
|
||||
print '2-4: %i' % struct.unpack('>H', h0[2:4])[0]
|
||||
print '4-6: %i' % struct.unpack('>H', h0[4:6])[0]
|
||||
print '6-8: %i' % struct.unpack('>H', h0[6:8])[0]
|
||||
print '8-10: %i' % struct.unpack('>H', h0[8:10])[0]
|
||||
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
|
||||
print '12-14 Non-Text: %i' % struct.unpack('>H', h0[12:14])[0]
|
||||
print '14-16: %i' % struct.unpack('>H', h0[14:16])[0]
|
||||
print '16-18: %i' % struct.unpack('>H', h0[16:18])[0]
|
||||
print '18-20: %i' % struct.unpack('>H', h0[18:20])[0]
|
||||
print '20-22: %i' % struct.unpack('>H', h0[20:22])[0]
|
||||
print '22-24: %i' % struct.unpack('>H', h0[22:24])[0]
|
||||
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0]
|
||||
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
|
||||
print '28-30 footnote_rec: %i' % struct.unpack('>H', h0[28:30])[0]
|
||||
print '30-32 sidebar_rec: %i' % struct.unpack('>H', h0[30:32])[0]
|
||||
print '32-34 bookmark_offset: %i' % struct.unpack('>H', h0[32:34])[0]
|
||||
print '34-36: %i' % struct.unpack('>H', h0[34:36])[0]
|
||||
print '36-38: %i' % struct.unpack('>H', h0[36:38])[0]
|
||||
print '38-40: %i' % struct.unpack('>H', h0[38:40])[0]
|
||||
print '40-42 image_data_offset: %i' % struct.unpack('>H', h0[40:42])[0]
|
||||
print '42-44: %i' % struct.unpack('>H', h0[42:44])[0]
|
||||
print '44-46 metadata_offset: %i' % struct.unpack('>H', h0[44:46])[0]
|
||||
print '46-48: %i' % struct.unpack('>H', h0[46:48])[0]
|
||||
print '48-50 footnote_offset: %i' % struct.unpack('>H', h0[48:50])[0]
|
||||
print '50-52 sidebar_offset: %i' % struct.unpack('>H', h0[50:52])[0]
|
||||
print '52-54 last_data_offset: %i' % struct.unpack('>H', h0[52:54])[0]
|
||||
|
||||
print ''
|
||||
|
||||
def section_lengths(header):
|
||||
print 'Section Sizes'
|
||||
print ''
|
||||
|
||||
for i in range(0, header.section_count()):
|
||||
size = len(header.section_data(i))
|
||||
if size > 65505:
|
||||
message = '<--- Over!'
|
||||
else:
|
||||
message = ''
|
||||
|
||||
print 'Section %i: %i %s' % (i, size, message)
|
||||
|
||||
def main(args=sys.argv):
|
||||
if len(args) < 2:
|
||||
print 'Error: requires input file.'
|
||||
return 1
|
||||
|
||||
f = open(sys.argv[1], 'rb')
|
||||
|
||||
pheader = PdbHeaderReader(f)
|
||||
|
||||
pdb_header_info(pheader)
|
||||
ereader_header_info(pheader)
|
||||
section_lengths(pheader)
|
||||
|
||||
return 0
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
@ -27,7 +27,7 @@ class Writer(object):
|
||||
|
||||
hr = [self._header_record(len(text), len(images))]
|
||||
|
||||
sections = hr+text+images+metadata
|
||||
sections = hr+text+images+metadata+['MeTaInFo\x00']
|
||||
|
||||
lengths = [len(i) for i in sections]
|
||||
|
||||
@ -82,7 +82,7 @@ class Writer(object):
|
||||
if image_items > 0:
|
||||
image_data_offset = text_items + 1
|
||||
meta_data_offset = image_data_offset + image_items
|
||||
last_data_offset = meta_data_offset + 1
|
||||
last_data_offset = meta_data_offset + 2
|
||||
else:
|
||||
meta_data_offset = text_items + 1
|
||||
last_data_offset = meta_data_offset + 1
|
||||
@ -90,6 +90,35 @@ class Writer(object):
|
||||
|
||||
record = u''
|
||||
|
||||
record += struct.pack('>H', version) # [0:2]
|
||||
record += struct.pack('>H', 0) # [2:4]
|
||||
record += struct.pack('>H', 0) # [4:6]
|
||||
record += struct.pack('>H', 25152) # [6:8] # 25152 is MAGIC
|
||||
record += struct.pack('>H', last_data_offset) # [8:10]
|
||||
record += struct.pack('>H', last_data_offset) # [10:12]
|
||||
record += struct.pack('>H', non_text_offset) # [12:14] # non_text_offset
|
||||
record += struct.pack('>H', non_text_offset) # [14:16]
|
||||
record += struct.pack('>H', 1) # [16:18]
|
||||
record += struct.pack('>H', 1) # [18:20]
|
||||
record += struct.pack('>H', 0) # [20:22]
|
||||
record += struct.pack('>H', 1) # [22:24]
|
||||
record += struct.pack('>H', 1) # [24:26]
|
||||
record += struct.pack('>H', 0) # [26:28]
|
||||
record += struct.pack('>H', 0) # [28:30] # footnote_rec
|
||||
record += struct.pack('>H', 0) # [30:32] # sidebar_rec
|
||||
record += struct.pack('>H', last_data_offset) # [32:34] # bookmark_offset
|
||||
record += struct.pack('>H', 2560) # [34:36] # 2560 is MAGIC
|
||||
record += struct.pack('>H', non_text_offset) # [36:38]
|
||||
record += struct.pack('>H', non_text_offset + 1) # [38:40]
|
||||
record += struct.pack('>H', image_data_offset) # [40:42]
|
||||
record += struct.pack('>H', image_data_offset) # [42:44]
|
||||
record += struct.pack('>H', meta_data_offset) # [44:46]
|
||||
record += struct.pack('>H', meta_data_offset) # [46:48]
|
||||
record += struct.pack('>H', last_data_offset) # [48:50] # footnote_offset
|
||||
record += struct.pack('>H', last_data_offset) # [52:54] # sidebar_offset
|
||||
record += struct.pack('>H', last_data_offset) # [54:56] # last_data_offset
|
||||
|
||||
'''
|
||||
# Version
|
||||
record += struct.pack('>H', version)
|
||||
record = record.ljust(12, '\x00')
|
||||
@ -112,6 +141,6 @@ class Writer(object):
|
||||
record += struct.pack('>H', last_data_offset)
|
||||
record = record.ljust(52, '\x00')
|
||||
record += struct.pack('>H', last_data_offset)
|
||||
|
||||
'''
|
||||
return record
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user