mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Fix bug 2342. ereader inspector script to aid in implementing writer. ereader writer tweaks.
This commit is contained in:
parent
1daf7bd86a
commit
ccdb992992
@ -95,6 +95,18 @@ class HTMLPreProcessor(object):
|
|||||||
|
|
||||||
# Fix pdftohtml markup
|
# Fix pdftohtml markup
|
||||||
PDFTOHTML = [
|
PDFTOHTML = [
|
||||||
|
# Fix umlauts
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*o', re.UNICODE), lambda match: u'ö'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*O', re.UNICODE), lambda match: u'Ö'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*u', re.UNICODE), lambda match: u'ü'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*U', re.UNICODE), lambda match: u'Ü'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*e', re.UNICODE), lambda match: u'ë'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*E', re.UNICODE), lambda match: u'Ë'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*i', re.UNICODE), lambda match: u'ï'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*I', re.UNICODE), lambda match: u'Ï'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*a', re.UNICODE), lambda match: u'ä'),
|
||||||
|
(re.compile(u'¨\s*(<br.*?>)*\s*A', re.UNICODE), lambda match: u'Ä'),
|
||||||
|
|
||||||
# Remove page links
|
# Remove page links
|
||||||
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
(re.compile(r'<a name=\d+></a>', re.IGNORECASE), lambda match: ''),
|
||||||
# Remove <hr> tags
|
# Remove <hr> tags
|
||||||
|
87
src/calibre/ebooks/pdb/ereader/inspector.py
Normal file
87
src/calibre/ebooks/pdb/ereader/inspector.py
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
'''
|
||||||
|
Inspect the header of ereader files. This is primarily used for debugging.
|
||||||
|
'''
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2009, John Schember <john@nachtimwald.com>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import struct, sys
|
||||||
|
|
||||||
|
from calibre.ebooks.pdb.header import PdbHeaderReader
|
||||||
|
from calibre.ebooks.pdb.ereader.reader import HeaderRecord
|
||||||
|
|
||||||
|
def pdb_header_info(header):
|
||||||
|
print 'PDB Header Info:'
|
||||||
|
print ''
|
||||||
|
print 'Identity: %s' % header.ident
|
||||||
|
print 'Total Sectons: %s' % header.num_sections
|
||||||
|
print 'Title: %s' % header.title
|
||||||
|
print ''
|
||||||
|
|
||||||
|
def ereader_header_info(header):
|
||||||
|
h0 = header.section_data(0)
|
||||||
|
|
||||||
|
print 'Ereader Record 0 (Header) Info:'
|
||||||
|
print ''
|
||||||
|
print '0-2 Version: %i' % struct.unpack('>H', h0[0:2])[0]
|
||||||
|
print '2-4: %i' % struct.unpack('>H', h0[2:4])[0]
|
||||||
|
print '4-6: %i' % struct.unpack('>H', h0[4:6])[0]
|
||||||
|
print '6-8: %i' % struct.unpack('>H', h0[6:8])[0]
|
||||||
|
print '8-10: %i' % struct.unpack('>H', h0[8:10])[0]
|
||||||
|
print '10-12: %i' % struct.unpack('>H', h0[10:12])[0]
|
||||||
|
print '12-14 Non-Text: %i' % struct.unpack('>H', h0[12:14])[0]
|
||||||
|
print '14-16: %i' % struct.unpack('>H', h0[14:16])[0]
|
||||||
|
print '16-18: %i' % struct.unpack('>H', h0[16:18])[0]
|
||||||
|
print '18-20: %i' % struct.unpack('>H', h0[18:20])[0]
|
||||||
|
print '20-22: %i' % struct.unpack('>H', h0[20:22])[0]
|
||||||
|
print '22-24: %i' % struct.unpack('>H', h0[22:24])[0]
|
||||||
|
print '24-26: %i' % struct.unpack('>H', h0[24:26])[0]
|
||||||
|
print '26-28: %i' % struct.unpack('>H', h0[26:28])[0]
|
||||||
|
print '28-30 footnote_rec: %i' % struct.unpack('>H', h0[28:30])[0]
|
||||||
|
print '30-32 sidebar_rec: %i' % struct.unpack('>H', h0[30:32])[0]
|
||||||
|
print '32-34 bookmark_offset: %i' % struct.unpack('>H', h0[32:34])[0]
|
||||||
|
print '34-36: %i' % struct.unpack('>H', h0[34:36])[0]
|
||||||
|
print '36-38: %i' % struct.unpack('>H', h0[36:38])[0]
|
||||||
|
print '38-40: %i' % struct.unpack('>H', h0[38:40])[0]
|
||||||
|
print '40-42 image_data_offset: %i' % struct.unpack('>H', h0[40:42])[0]
|
||||||
|
print '42-44: %i' % struct.unpack('>H', h0[42:44])[0]
|
||||||
|
print '44-46 metadata_offset: %i' % struct.unpack('>H', h0[44:46])[0]
|
||||||
|
print '46-48: %i' % struct.unpack('>H', h0[46:48])[0]
|
||||||
|
print '48-50 footnote_offset: %i' % struct.unpack('>H', h0[48:50])[0]
|
||||||
|
print '50-52 sidebar_offset: %i' % struct.unpack('>H', h0[50:52])[0]
|
||||||
|
print '52-54 last_data_offset: %i' % struct.unpack('>H', h0[52:54])[0]
|
||||||
|
|
||||||
|
print ''
|
||||||
|
|
||||||
|
def section_lengths(header):
|
||||||
|
print 'Section Sizes'
|
||||||
|
print ''
|
||||||
|
|
||||||
|
for i in range(0, header.section_count()):
|
||||||
|
size = len(header.section_data(i))
|
||||||
|
if size > 65505:
|
||||||
|
message = '<--- Over!'
|
||||||
|
else:
|
||||||
|
message = ''
|
||||||
|
|
||||||
|
print 'Section %i: %i %s' % (i, size, message)
|
||||||
|
|
||||||
|
def main(args=sys.argv):
|
||||||
|
if len(args) < 2:
|
||||||
|
print 'Error: requires input file.'
|
||||||
|
return 1
|
||||||
|
|
||||||
|
f = open(sys.argv[1], 'rb')
|
||||||
|
|
||||||
|
pheader = PdbHeaderReader(f)
|
||||||
|
|
||||||
|
pdb_header_info(pheader)
|
||||||
|
ereader_header_info(pheader)
|
||||||
|
section_lengths(pheader)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
@ -27,7 +27,7 @@ class Writer(object):
|
|||||||
|
|
||||||
hr = [self._header_record(len(text), len(images))]
|
hr = [self._header_record(len(text), len(images))]
|
||||||
|
|
||||||
sections = hr+text+images+metadata
|
sections = hr+text+images+metadata+['MeTaInFo\x00']
|
||||||
|
|
||||||
lengths = [len(i) for i in sections]
|
lengths = [len(i) for i in sections]
|
||||||
|
|
||||||
@ -82,7 +82,7 @@ class Writer(object):
|
|||||||
if image_items > 0:
|
if image_items > 0:
|
||||||
image_data_offset = text_items + 1
|
image_data_offset = text_items + 1
|
||||||
meta_data_offset = image_data_offset + image_items
|
meta_data_offset = image_data_offset + image_items
|
||||||
last_data_offset = meta_data_offset + 1
|
last_data_offset = meta_data_offset + 2
|
||||||
else:
|
else:
|
||||||
meta_data_offset = text_items + 1
|
meta_data_offset = text_items + 1
|
||||||
last_data_offset = meta_data_offset + 1
|
last_data_offset = meta_data_offset + 1
|
||||||
@ -90,6 +90,35 @@ class Writer(object):
|
|||||||
|
|
||||||
record = u''
|
record = u''
|
||||||
|
|
||||||
|
record += struct.pack('>H', version) # [0:2]
|
||||||
|
record += struct.pack('>H', 0) # [2:4]
|
||||||
|
record += struct.pack('>H', 0) # [4:6]
|
||||||
|
record += struct.pack('>H', 25152) # [6:8] # 25152 is MAGIC
|
||||||
|
record += struct.pack('>H', last_data_offset) # [8:10]
|
||||||
|
record += struct.pack('>H', last_data_offset) # [10:12]
|
||||||
|
record += struct.pack('>H', non_text_offset) # [12:14] # non_text_offset
|
||||||
|
record += struct.pack('>H', non_text_offset) # [14:16]
|
||||||
|
record += struct.pack('>H', 1) # [16:18]
|
||||||
|
record += struct.pack('>H', 1) # [18:20]
|
||||||
|
record += struct.pack('>H', 0) # [20:22]
|
||||||
|
record += struct.pack('>H', 1) # [22:24]
|
||||||
|
record += struct.pack('>H', 1) # [24:26]
|
||||||
|
record += struct.pack('>H', 0) # [26:28]
|
||||||
|
record += struct.pack('>H', 0) # [28:30] # footnote_rec
|
||||||
|
record += struct.pack('>H', 0) # [30:32] # sidebar_rec
|
||||||
|
record += struct.pack('>H', last_data_offset) # [32:34] # bookmark_offset
|
||||||
|
record += struct.pack('>H', 2560) # [34:36] # 2560 is MAGIC
|
||||||
|
record += struct.pack('>H', non_text_offset) # [36:38]
|
||||||
|
record += struct.pack('>H', non_text_offset + 1) # [38:40]
|
||||||
|
record += struct.pack('>H', image_data_offset) # [40:42]
|
||||||
|
record += struct.pack('>H', image_data_offset) # [42:44]
|
||||||
|
record += struct.pack('>H', meta_data_offset) # [44:46]
|
||||||
|
record += struct.pack('>H', meta_data_offset) # [46:48]
|
||||||
|
record += struct.pack('>H', last_data_offset) # [48:50] # footnote_offset
|
||||||
|
record += struct.pack('>H', last_data_offset) # [52:54] # sidebar_offset
|
||||||
|
record += struct.pack('>H', last_data_offset) # [54:56] # last_data_offset
|
||||||
|
|
||||||
|
'''
|
||||||
# Version
|
# Version
|
||||||
record += struct.pack('>H', version)
|
record += struct.pack('>H', version)
|
||||||
record = record.ljust(12, '\x00')
|
record = record.ljust(12, '\x00')
|
||||||
@ -112,6 +141,6 @@ class Writer(object):
|
|||||||
record += struct.pack('>H', last_data_offset)
|
record += struct.pack('>H', last_data_offset)
|
||||||
record = record.ljust(52, '\x00')
|
record = record.ljust(52, '\x00')
|
||||||
record += struct.pack('>H', last_data_offset)
|
record += struct.pack('>H', last_data_offset)
|
||||||
|
'''
|
||||||
return record
|
return record
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user