mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Sync to trunk.
This commit is contained in:
commit
c112973417
@ -3,10 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
|
|||||||
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
||||||
title = u'FHM UK'
|
title = u'FHM UK'
|
||||||
description = 'Good News for Men'
|
description = 'Good News for Men'
|
||||||
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
|
||||||
|
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
|
||||||
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
|
||||||
__author__ = 'Dave Asbury'
|
__author__ = 'Dave Asbury'
|
||||||
# last updated 27/1/12
|
# last updated 17/3/12
|
||||||
language = 'en_GB'
|
language = 'en_GB'
|
||||||
oldest_article = 28
|
oldest_article = 28
|
||||||
max_articles_per_feed = 12
|
max_articles_per_feed = 12
|
||||||
@ -29,6 +30,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
|
|||||||
feeds = [
|
feeds = [
|
||||||
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
|
||||||
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
|
||||||
(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
|
||||||
(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
|
||||||
|
#(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
|
||||||
|
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
|
||||||
]
|
]
|
||||||
|
@ -625,7 +625,8 @@ from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
|
|||||||
POCKETBOOK701, POCKETBOOK360P, PI2)
|
POCKETBOOK701, POCKETBOOK360P, PI2)
|
||||||
from calibre.devices.iliad.driver import ILIAD
|
from calibre.devices.iliad.driver import ILIAD
|
||||||
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
|
||||||
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
|
from calibre.devices.jetbook.driver import (JETBOOK, MIBUK, JETBOOK_MINI,
|
||||||
|
JETBOOK_COLOR)
|
||||||
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
|
||||||
KINDLE_FIRE)
|
KINDLE_FIRE)
|
||||||
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
|
||||||
@ -664,9 +665,7 @@ plugins += [
|
|||||||
ILIAD,
|
ILIAD,
|
||||||
IREXDR1000,
|
IREXDR1000,
|
||||||
IREXDR800,
|
IREXDR800,
|
||||||
JETBOOK,
|
JETBOOK, JETBOOK_MINI, MIBUK, JETBOOK_COLOR,
|
||||||
JETBOOK_MINI,
|
|
||||||
MIBUK,
|
|
||||||
SHINEBOOK,
|
SHINEBOOK,
|
||||||
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
|
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
|
||||||
PI2,
|
PI2,
|
||||||
|
@ -234,7 +234,7 @@ def main(args=sys.argv):
|
|||||||
sql_dump = args[-1]
|
sql_dump = args[-1]
|
||||||
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
|
||||||
elif opts.inspect_mobi:
|
elif opts.inspect_mobi:
|
||||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
for path in args[1:]:
|
for path in args[1:]:
|
||||||
prints('Inspecting:', path)
|
prints('Inspecting:', path)
|
||||||
inspect_mobi(path)
|
inspect_mobi(path)
|
||||||
|
@ -125,4 +125,29 @@ class JETBOOK_MINI(USBMS):
|
|||||||
|
|
||||||
SUPPORTS_SUB_DIRS = True
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
class JETBOOK_COLOR(USBMS):
|
||||||
|
|
||||||
|
'''
|
||||||
|
set([(u'0x951',
|
||||||
|
u'0x160b',
|
||||||
|
u'0x0',
|
||||||
|
u'Freescale',
|
||||||
|
u'Mass Storage Device',
|
||||||
|
u'0802270905553')])
|
||||||
|
'''
|
||||||
|
|
||||||
|
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'rtf', 'txt', 'pdf', 'djvu']
|
||||||
|
|
||||||
|
gui_name = 'JetBook Color'
|
||||||
|
name = 'JetBook Color Device Interface'
|
||||||
|
description = _('Communicate with the JetBook Color reader.')
|
||||||
|
author = 'Kovid Goyal'
|
||||||
|
|
||||||
|
VENDOR_ID = [0x951]
|
||||||
|
PRODUCT_ID = [0x160b]
|
||||||
|
BCD = [0x0]
|
||||||
|
EBOOK_DIR_MAIN = 'My Books'
|
||||||
|
|
||||||
|
SUPPORTS_SUB_DIRS = True
|
||||||
|
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ class PRS505(USBMS):
|
|||||||
booklist_class = CollectionsBookList
|
booklist_class = CollectionsBookList
|
||||||
|
|
||||||
|
|
||||||
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
|
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt', 'zbf']
|
||||||
CAN_SET_METADATA = ['title', 'authors', 'collections']
|
CAN_SET_METADATA = ['title', 'authors', 'collections']
|
||||||
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
CAN_DO_DEVICE_DB_PLUGBOARD = True
|
||||||
|
|
||||||
|
@ -179,7 +179,7 @@ class MOBIOutput(OutputFormatPlugin):
|
|||||||
writer(oeb, output_path)
|
writer(oeb, output_path)
|
||||||
|
|
||||||
if opts.extract_to is not None:
|
if opts.extract_to is not None:
|
||||||
from calibre.ebooks.mobi.debug import inspect_mobi
|
from calibre.ebooks.mobi.debug.main import inspect_mobi
|
||||||
ddir = opts.extract_to
|
ddir = opts.extract_to
|
||||||
inspect_mobi(output_path, ddir=ddir)
|
inspect_mobi(output_path, ddir=ddir)
|
||||||
|
|
||||||
|
16
src/calibre/ebooks/mobi/debug/__init__.py
Normal file
16
src/calibre/ebooks/mobi/debug/__init__.py
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
|
||||||
|
def format_bytes(byts):
|
||||||
|
byts = bytearray(byts)
|
||||||
|
byts = [hex(b)[2:] for b in byts]
|
||||||
|
return ' '.join(byts)
|
||||||
|
|
||||||
|
|
535
src/calibre/ebooks/mobi/debug/headers.py
Normal file
535
src/calibre/ebooks/mobi/debug/headers.py
Normal file
@ -0,0 +1,535 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import struct, datetime, os
|
||||||
|
|
||||||
|
from calibre.utils.date import utc_tz
|
||||||
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
|
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
||||||
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
|
from calibre.ebooks.mobi.utils import get_trailing_data
|
||||||
|
|
||||||
|
# PalmDB {{{
|
||||||
|
class PalmDOCAttributes(object):
|
||||||
|
|
||||||
|
class Attr(object):
|
||||||
|
|
||||||
|
def __init__(self, name, field, val):
|
||||||
|
self.name = name
|
||||||
|
self.val = val & field
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s: %s'%(self.name, bool(self.val))
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.val = struct.unpack(b'<H', raw)[0]
|
||||||
|
self.attributes = []
|
||||||
|
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
|
||||||
|
('Backup this database', 0x08),
|
||||||
|
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
|
||||||
|
('Force the PalmPilot to reset after this database is installed', 0x12),
|
||||||
|
('Don\'t allow copy of file to be beamed to other Pilot',
|
||||||
|
0x14)]:
|
||||||
|
self.attributes.append(PalmDOCAttributes.Attr(name, field,
|
||||||
|
self.val))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
attrs = '\n\t'.join([str(x) for x in self.attributes])
|
||||||
|
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
|
||||||
|
|
||||||
|
class PalmDB(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.raw = raw
|
||||||
|
|
||||||
|
if self.raw.startswith(b'TPZ'):
|
||||||
|
raise ValueError('This is a Topaz file')
|
||||||
|
|
||||||
|
self.name = self.raw[:32].replace(b'\x00', b'')
|
||||||
|
self.attributes = PalmDOCAttributes(self.raw[32:34])
|
||||||
|
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
|
||||||
|
|
||||||
|
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
|
||||||
|
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
|
||||||
|
self.creation_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.creation_date_raw))
|
||||||
|
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
|
||||||
|
self.modification_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.modification_date_raw))
|
||||||
|
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
|
||||||
|
self.last_backup_date = (palm_epoch +
|
||||||
|
datetime.timedelta(seconds=self.last_backup_date_raw))
|
||||||
|
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
|
||||||
|
self.app_info_id = self.raw[52:56]
|
||||||
|
self.sort_info_id = self.raw[56:60]
|
||||||
|
self.type = self.raw[60:64]
|
||||||
|
self.creator = self.raw[64:68]
|
||||||
|
self.ident = self.type + self.creator
|
||||||
|
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
||||||
|
raise ValueError('Unknown book ident: %r'%self.ident)
|
||||||
|
self.last_record_uid, = struct.unpack(b'>I', self.raw[68:72])
|
||||||
|
self.next_rec_list_id = self.raw[72:76]
|
||||||
|
|
||||||
|
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
|
||||||
|
ans.append('Name: %r'%self.name)
|
||||||
|
ans.append(str(self.attributes))
|
||||||
|
ans.append('Version: %s'%self.version)
|
||||||
|
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
|
||||||
|
self.creation_date_raw))
|
||||||
|
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
|
||||||
|
self.modification_date_raw))
|
||||||
|
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
|
||||||
|
self.last_backup_date_raw))
|
||||||
|
ans.append('Modification number: %s'%self.modification_number)
|
||||||
|
ans.append('App Info ID: %r'%self.app_info_id)
|
||||||
|
ans.append('Sort Info ID: %r'%self.sort_info_id)
|
||||||
|
ans.append('Type: %r'%self.type)
|
||||||
|
ans.append('Creator: %r'%self.creator)
|
||||||
|
ans.append('Last record UID +1: %r'%self.last_record_uid)
|
||||||
|
ans.append('Next record list id: %r'%self.next_rec_list_id)
|
||||||
|
ans.append('Number of records: %s'%self.number_of_records)
|
||||||
|
|
||||||
|
return '\n'.join(ans)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class Record(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, raw, header):
|
||||||
|
self.offset, self.flags, self.uid = header
|
||||||
|
self.raw = raw
|
||||||
|
|
||||||
|
@property
|
||||||
|
def header(self):
|
||||||
|
return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags,
|
||||||
|
self.uid, self.raw[:4], len(self.raw))
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
# EXTH {{{
|
||||||
|
class EXTHRecord(object):
|
||||||
|
|
||||||
|
def __init__(self, type_, data):
|
||||||
|
self.type = type_
|
||||||
|
self.data = data
|
||||||
|
self.name = {
|
||||||
|
1 : 'DRM Server id',
|
||||||
|
2 : 'DRM Commerce id',
|
||||||
|
3 : 'DRM ebookbase book id',
|
||||||
|
100 : 'author',
|
||||||
|
101 : 'publisher',
|
||||||
|
102 : 'imprint',
|
||||||
|
103 : 'description',
|
||||||
|
104 : 'isbn',
|
||||||
|
105 : 'subject',
|
||||||
|
106 : 'publishingdate',
|
||||||
|
107 : 'review',
|
||||||
|
108 : 'contributor',
|
||||||
|
109 : 'rights',
|
||||||
|
110 : 'subjectcode',
|
||||||
|
111 : 'type',
|
||||||
|
112 : 'source',
|
||||||
|
113 : 'asin',
|
||||||
|
114 : 'versionnumber',
|
||||||
|
115 : 'sample',
|
||||||
|
116 : 'startreading',
|
||||||
|
117 : 'adult',
|
||||||
|
118 : 'retailprice',
|
||||||
|
119 : 'retailpricecurrency',
|
||||||
|
121 : 'KF8 header section index',
|
||||||
|
125 : 'KF8 resources (images/fonts) count',
|
||||||
|
129 : 'KF8 cover URI',
|
||||||
|
131 : 'KF8 unknown count',
|
||||||
|
201 : 'coveroffset',
|
||||||
|
202 : 'thumboffset',
|
||||||
|
203 : 'hasfakecover',
|
||||||
|
204 : 'Creator Software',
|
||||||
|
205 : 'Creator Major Version', # '>I'
|
||||||
|
206 : 'Creator Minor Version', # '>I'
|
||||||
|
207 : 'Creator Build Number', # '>I'
|
||||||
|
208 : 'watermark',
|
||||||
|
209 : 'tamper_proof_keys',
|
||||||
|
300 : 'fontsignature',
|
||||||
|
301 : 'clippinglimit', # percentage '>B'
|
||||||
|
402 : 'publisherlimit',
|
||||||
|
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
|
||||||
|
501 : 'cdetype', # 4 chars (PDOC or EBOK)
|
||||||
|
502 : 'lastupdatetime',
|
||||||
|
503 : 'updatedtitle',
|
||||||
|
}.get(self.type, repr(self.type))
|
||||||
|
|
||||||
|
if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
|
||||||
|
'Creator Major Version', 'Creator Minor Version',
|
||||||
|
'Creator Build Number', 'Creator Software', 'startreading'} or
|
||||||
|
self.type in {121, 125, 131}):
|
||||||
|
self.data, = struct.unpack(b'>I', self.data)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '%s (%d): %r'%(self.name, self.type, self.data)
|
||||||
|
|
||||||
|
class EXTHHeader(object):
|
||||||
|
|
||||||
|
def __init__(self, raw):
|
||||||
|
self.raw = raw
|
||||||
|
if not self.raw.startswith(b'EXTH'):
|
||||||
|
raise ValueError('EXTH header does not start with EXTH')
|
||||||
|
self.length, = struct.unpack(b'>I', self.raw[4:8])
|
||||||
|
self.count, = struct.unpack(b'>I', self.raw[8:12])
|
||||||
|
|
||||||
|
pos = 12
|
||||||
|
self.records = []
|
||||||
|
for i in xrange(self.count):
|
||||||
|
pos = self.read_record(pos)
|
||||||
|
self.records.sort(key=lambda x:x.type)
|
||||||
|
self.rmap = {x.type:x for x in self.records}
|
||||||
|
|
||||||
|
def __getitem__(self, type_):
|
||||||
|
return self.rmap.__getitem__(type_).data
|
||||||
|
|
||||||
|
def get(self, type_, default=None):
|
||||||
|
ans = self.rmap.get(type_, default)
|
||||||
|
return getattr(ans, 'data', default)
|
||||||
|
|
||||||
|
def read_record(self, pos):
|
||||||
|
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
|
||||||
|
data = self.raw[(pos+8):(pos+length)]
|
||||||
|
self.records.append(EXTHRecord(type_, data))
|
||||||
|
return pos + length
|
||||||
|
|
||||||
|
@property
|
||||||
|
def kf8_header_index(self):
|
||||||
|
return self.get(121, None)
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
|
||||||
|
ans.append('EXTH header length: %d'%self.length)
|
||||||
|
ans.append('Number of EXTH records: %d'%self.count)
|
||||||
|
ans.append('EXTH records...')
|
||||||
|
for r in self.records:
|
||||||
|
ans.append(str(r))
|
||||||
|
return '\n'.join(ans)
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class MOBIHeader(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, record0, offset):
|
||||||
|
self.raw = record0.raw
|
||||||
|
self.header_offset = offset
|
||||||
|
|
||||||
|
self.compression_raw = self.raw[:2]
|
||||||
|
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
|
||||||
|
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
|
||||||
|
self.compression_raw)[0],
|
||||||
|
repr(self.compression_raw))
|
||||||
|
self.unused = self.raw[2:4]
|
||||||
|
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
|
||||||
|
self.number_of_text_records, self.text_record_size = \
|
||||||
|
struct.unpack(b'>HH', self.raw[8:12])
|
||||||
|
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
|
||||||
|
self.encryption_type = {
|
||||||
|
0: 'No encryption',
|
||||||
|
1: 'Old mobipocket encryption',
|
||||||
|
2: 'Mobipocket encryption'
|
||||||
|
}.get(self.encryption_type_raw, repr(self.encryption_type_raw))
|
||||||
|
self.unknown = self.raw[14:16]
|
||||||
|
|
||||||
|
self.identifier = self.raw[16:20]
|
||||||
|
if self.identifier != b'MOBI':
|
||||||
|
raise ValueError('Identifier %r unknown'%self.identifier)
|
||||||
|
|
||||||
|
self.length, = struct.unpack(b'>I', self.raw[20:24])
|
||||||
|
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
|
||||||
|
self.type = {
|
||||||
|
2 : 'Mobipocket book',
|
||||||
|
3 : 'PalmDOC book',
|
||||||
|
4 : 'Audio',
|
||||||
|
257 : 'News',
|
||||||
|
258 : 'News Feed',
|
||||||
|
259 : 'News magazine',
|
||||||
|
513 : 'PICS',
|
||||||
|
514 : 'Word',
|
||||||
|
515 : 'XLS',
|
||||||
|
516 : 'PPT',
|
||||||
|
517 : 'TEXT',
|
||||||
|
518 : 'HTML',
|
||||||
|
}.get(self.type_raw, repr(self.type_raw))
|
||||||
|
|
||||||
|
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
||||||
|
self.encoding = {
|
||||||
|
1252 : 'cp1252',
|
||||||
|
65001: 'utf-8',
|
||||||
|
}.get(self.encoding_raw, repr(self.encoding_raw))
|
||||||
|
self.uid = self.raw[32:36]
|
||||||
|
self.file_version, = struct.unpack(b'>I', self.raw[36:40])
|
||||||
|
self.meta_orth_indx, self.meta_infl_indx = struct.unpack(
|
||||||
|
b'>II', self.raw[40:48])
|
||||||
|
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
|
||||||
|
self.reserved = self.raw[52:80]
|
||||||
|
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
|
||||||
|
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
|
||||||
|
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
|
||||||
|
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
|
||||||
|
langcode = self.locale_raw
|
||||||
|
langid = langcode & 0xFF
|
||||||
|
sublangid = (langcode >> 10) & 0xFF
|
||||||
|
self.language = main_language.get(langid, 'ENGLISH')
|
||||||
|
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
||||||
|
|
||||||
|
self.input_language = self.raw[96:100]
|
||||||
|
self.output_langauage = self.raw[100:104]
|
||||||
|
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
|
||||||
|
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
|
||||||
|
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
|
||||||
|
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
|
||||||
|
self.datp_record_offset, = struct.unpack(b'>I', self.raw[120:124])
|
||||||
|
self.datp_record_count, = struct.unpack(b'>I', self.raw[124:128])
|
||||||
|
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
|
||||||
|
self.has_exth = bool(self.exth_flags & 0x40)
|
||||||
|
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
|
||||||
|
if self.has_drm_data:
|
||||||
|
self.unknown3 = self.raw[132:164]
|
||||||
|
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
|
||||||
|
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
|
||||||
|
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
|
||||||
|
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
|
||||||
|
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
||||||
|
self.has_fcis_flis = False
|
||||||
|
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
|
||||||
|
self.extra_data_flags = 0
|
||||||
|
if self.has_extra_data_flags:
|
||||||
|
self.unknown4 = self.raw[180:192]
|
||||||
|
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
|
||||||
|
self.raw, 192)
|
||||||
|
(self.fcis_number, self.fcis_count, self.flis_number,
|
||||||
|
self.flis_count) = struct.unpack(b'>IIII',
|
||||||
|
self.raw[200:216])
|
||||||
|
self.unknown6 = self.raw[216:224]
|
||||||
|
self.srcs_record_index = struct.unpack(b'>I',
|
||||||
|
self.raw[224:228])[0]
|
||||||
|
self.num_srcs_records = struct.unpack(b'>I',
|
||||||
|
self.raw[228:232])[0]
|
||||||
|
self.unknown7 = self.raw[232:240]
|
||||||
|
self.extra_data_flags = struct.unpack(b'>I',
|
||||||
|
self.raw[240:244])[0]
|
||||||
|
self.has_multibytes = bool(self.extra_data_flags & 0b1)
|
||||||
|
self.has_indexing_bytes = bool(self.extra_data_flags & 0b10)
|
||||||
|
self.has_uncrossable_breaks = bool(self.extra_data_flags & 0b100)
|
||||||
|
self.primary_index_record, = struct.unpack(b'>I',
|
||||||
|
self.raw[244:248])
|
||||||
|
|
||||||
|
if self.file_version >= 8:
|
||||||
|
(self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
|
||||||
|
) = struct.unpack_from(b'>4L', self.raw, 248)
|
||||||
|
self.unknown9 = self.raw[264:self.length]
|
||||||
|
if self.meta_orth_indx != self.sect_idx:
|
||||||
|
raise ValueError('KF8 header has different Meta orth and '
|
||||||
|
'section indices')
|
||||||
|
|
||||||
|
# The following are all relative to the position of the header record
|
||||||
|
# make them absolute for ease of debugging
|
||||||
|
for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
|
||||||
|
'meta_orth_indx', 'huffman_record_offset',
|
||||||
|
'first_non_book_record', 'datp_record_offset', 'fcis_number',
|
||||||
|
'flis_number', 'primary_index_record', 'fdst_idx',
|
||||||
|
'first_image_index'):
|
||||||
|
if hasattr(self, x):
|
||||||
|
setattr(self, x, self.header_offset+getattr(self, x))
|
||||||
|
|
||||||
|
if self.has_exth:
|
||||||
|
self.exth_offset = 16 + self.length
|
||||||
|
|
||||||
|
self.exth = EXTHHeader(self.raw[self.exth_offset:])
|
||||||
|
|
||||||
|
self.end_of_exth = self.exth_offset + self.exth.length
|
||||||
|
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
|
||||||
|
a = ans.append
|
||||||
|
i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
|
||||||
|
ans.append('Compression: %s'%self.compression)
|
||||||
|
ans.append('Unused: %r'%self.unused)
|
||||||
|
ans.append('Number of text records: %d'%self.number_of_text_records)
|
||||||
|
ans.append('Text record size: %d'%self.text_record_size)
|
||||||
|
ans.append('Encryption: %s'%self.encryption_type)
|
||||||
|
ans.append('Unknown: %r'%self.unknown)
|
||||||
|
ans.append('Identifier: %r'%self.identifier)
|
||||||
|
ans.append('Header length: %d'% self.length)
|
||||||
|
ans.append('Type: %s'%self.type)
|
||||||
|
ans.append('Encoding: %s'%self.encoding)
|
||||||
|
ans.append('UID: %r'%self.uid)
|
||||||
|
ans.append('File version: %d'%self.file_version)
|
||||||
|
i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
|
||||||
|
i('Meta Infl Index', self.meta_infl_indx)
|
||||||
|
ans.append('Secondary index record: %d (null val: %d)'%(
|
||||||
|
self.secondary_index_record, NULL_INDEX))
|
||||||
|
ans.append('Reserved: %r'%self.reserved)
|
||||||
|
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
|
||||||
|
self.first_non_book_record))
|
||||||
|
ans.append('Full name offset: %d'%self.fullname_offset)
|
||||||
|
ans.append('Full name length: %d bytes'%self.fullname_length)
|
||||||
|
ans.append('Langcode: %r'%self.locale_raw)
|
||||||
|
ans.append('Language: %s'%self.language)
|
||||||
|
ans.append('Sub language: %s'%self.sublanguage)
|
||||||
|
ans.append('Input language: %r'%self.input_language)
|
||||||
|
ans.append('Output language: %r'%self.output_langauage)
|
||||||
|
ans.append('Min version: %d'%self.min_version)
|
||||||
|
ans.append('First Image index: %d'%self.first_image_index)
|
||||||
|
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
|
||||||
|
ans.append('Huffman record count: %d'%self.huffman_record_count)
|
||||||
|
ans.append('DATP record offset: %r'%self.datp_record_offset)
|
||||||
|
ans.append('DATP record count: %r'%self.datp_record_count)
|
||||||
|
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
|
||||||
|
if self.has_drm_data:
|
||||||
|
ans.append('Unknown3: %r'%self.unknown3)
|
||||||
|
ans.append('DRM Offset: %s'%self.drm_offset)
|
||||||
|
ans.append('DRM Count: %s'%self.drm_count)
|
||||||
|
ans.append('DRM Size: %s'%self.drm_size)
|
||||||
|
ans.append('DRM Flags: %r'%self.drm_flags)
|
||||||
|
if self.has_extra_data_flags:
|
||||||
|
ans.append('Unknown4: %r'%self.unknown4)
|
||||||
|
ans.append('FDST Index: %d'% self.fdst_idx)
|
||||||
|
ans.append('FDST Count: %d'% self.fdst_count)
|
||||||
|
ans.append('FCIS number: %d'% self.fcis_number)
|
||||||
|
ans.append('FCIS count: %d'% self.fcis_count)
|
||||||
|
ans.append('FLIS number: %d'% self.flis_number)
|
||||||
|
ans.append('FLIS count: %d'% self.flis_count)
|
||||||
|
ans.append('Unknown6: %r'% self.unknown6)
|
||||||
|
ans.append('SRCS record index: %d'%self.srcs_record_index)
|
||||||
|
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
|
||||||
|
ans.append('Unknown7: %r'%self.unknown7)
|
||||||
|
ans.append(('Extra data flags: %s (has multibyte: %s) '
|
||||||
|
'(has indexing: %s) (has uncrossable breaks: %s)')%(
|
||||||
|
bin(self.extra_data_flags), self.has_multibytes,
|
||||||
|
self.has_indexing_bytes, self.has_uncrossable_breaks ))
|
||||||
|
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
|
||||||
|
self.primary_index_record))
|
||||||
|
if self.file_version >= 8:
|
||||||
|
i('Sections Index', self.sect_idx)
|
||||||
|
i('SKEL Index', self.skel_idx)
|
||||||
|
i('DATP Index', self.datp_idx)
|
||||||
|
i('Other Index', self.oth_idx)
|
||||||
|
if self.unknown9:
|
||||||
|
a('Unknown9: %r'%self.unknown9)
|
||||||
|
|
||||||
|
ans = '\n'.join(ans)
|
||||||
|
|
||||||
|
if self.has_exth:
|
||||||
|
ans += '\n\n' + str(self.exth)
|
||||||
|
ans += '\n\nBytes after EXTH (%d bytes): %s'%(
|
||||||
|
len(self.bytes_after_exth),
|
||||||
|
format_bytes(self.bytes_after_exth))
|
||||||
|
|
||||||
|
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
|
||||||
|
self.fullname_length))
|
||||||
|
|
||||||
|
ans += '\nRecord 0 length: %d'%len(self.raw)
|
||||||
|
return ans
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
class MOBIFile(object):
|
||||||
|
|
||||||
|
def __init__(self, stream):
|
||||||
|
self.raw = stream.read()
|
||||||
|
self.palmdb = PalmDB(self.raw[:78])
|
||||||
|
|
||||||
|
self.record_headers = []
|
||||||
|
self.records = []
|
||||||
|
for i in xrange(self.palmdb.number_of_records):
|
||||||
|
pos = 78 + i * 8
|
||||||
|
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
|
||||||
|
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
||||||
|
self.record_headers.append((offset, flags, val))
|
||||||
|
|
||||||
|
def section(section_number):
|
||||||
|
if section_number == self.palmdb.number_of_records - 1:
|
||||||
|
end_off = len(self.raw)
|
||||||
|
else:
|
||||||
|
end_off = self.record_headers[section_number + 1][0]
|
||||||
|
off = self.record_headers[section_number][0]
|
||||||
|
return self.raw[off:end_off]
|
||||||
|
|
||||||
|
for i in range(self.palmdb.number_of_records):
|
||||||
|
self.records.append(Record(section(i), self.record_headers[i]))
|
||||||
|
|
||||||
|
self.mobi_header = MOBIHeader(self.records[0], 0)
|
||||||
|
self.huffman_record_nums = []
|
||||||
|
|
||||||
|
self.kf8_type = None
|
||||||
|
mh = mh8 = self.mobi_header
|
||||||
|
if mh.file_version >= 8:
|
||||||
|
self.kf8_type = 'standalone'
|
||||||
|
elif mh.has_exth and mh.exth.kf8_header_index is not None:
|
||||||
|
self.kf8_type = 'joint'
|
||||||
|
kf8i = mh.exth.kf8_header_index
|
||||||
|
mh8 = MOBIHeader(self.records[kf8i], kf8i)
|
||||||
|
self.mobi8_header = mh8
|
||||||
|
|
||||||
|
if 'huff' in self.mobi_header.compression.lower():
|
||||||
|
from calibre.ebooks.mobi.huffcdic import HuffReader
|
||||||
|
|
||||||
|
def huffit(off, cnt):
|
||||||
|
huffman_record_nums = list(xrange(off, off+cnt))
|
||||||
|
huffrecs = [self.records[r].raw for r in huffman_record_nums]
|
||||||
|
huffs = HuffReader(huffrecs)
|
||||||
|
return huffman_record_nums, huffs.unpack
|
||||||
|
|
||||||
|
if self.kf8_type == 'joint':
|
||||||
|
recs6, d6 = huffit(mh.huffman_record_offset,
|
||||||
|
mh.huffman_record_count)
|
||||||
|
recs8, d8 = huffit(mh8.huffman_record_offset,
|
||||||
|
mh8.huffman_record_count)
|
||||||
|
self.huffman_record_nums = recs6 + recs8
|
||||||
|
else:
|
||||||
|
self.huffman_record_nums, d6 = huffit(mh.huffman_record_offset,
|
||||||
|
mh.huffman_record_count)
|
||||||
|
d8 = d6
|
||||||
|
elif 'palmdoc' in self.mobi_header.compression.lower():
|
||||||
|
from calibre.ebooks.compression.palmdoc import decompress_doc
|
||||||
|
d8 = d6 = decompress_doc
|
||||||
|
else:
|
||||||
|
d8 = d6 = lambda x: x
|
||||||
|
|
||||||
|
self.decompress6, self.decompress8 = d6, d8
|
||||||
|
|
||||||
|
class TextRecord(object): # {{{
|
||||||
|
|
||||||
|
def __init__(self, idx, record, extra_data_flags, decompress):
|
||||||
|
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
|
||||||
|
raw_trailing_bytes = record.raw[len(self.raw):]
|
||||||
|
self.raw = decompress(self.raw)
|
||||||
|
|
||||||
|
if 0 in self.trailing_data:
|
||||||
|
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
|
||||||
|
if 1 in self.trailing_data:
|
||||||
|
self.trailing_data['indexing'] = self.trailing_data.pop(1)
|
||||||
|
if 2 in self.trailing_data:
|
||||||
|
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
|
||||||
|
self.trailing_data['raw_bytes'] = raw_trailing_bytes
|
||||||
|
|
||||||
|
for typ, val in self.trailing_data.iteritems():
|
||||||
|
if isinstance(typ, int):
|
||||||
|
print ('Record %d has unknown trailing data of type: %d : %r'%
|
||||||
|
(idx, typ, val))
|
||||||
|
|
||||||
|
self.idx = idx
|
||||||
|
|
||||||
|
def dump(self, folder):
|
||||||
|
name = '%06d'%self.idx
|
||||||
|
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
|
||||||
|
f.write(self.raw)
|
||||||
|
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
|
||||||
|
for k, v in self.trailing_data.iteritems():
|
||||||
|
raw = '%s : %r\n\n'%(k, v)
|
||||||
|
f.write(raw.encode('utf-8'))
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
|
48
src/calibre/ebooks/mobi/debug/main.py
Normal file
48
src/calibre/ebooks/mobi/debug/main.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os, shutil
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.debug.headers import MOBIFile
|
||||||
|
from calibre.ebooks.mobi.debug.mobi6 import inspect_mobi as inspect_mobi6
|
||||||
|
from calibre.ebooks.mobi.debug.mobi8 import inspect_mobi as inspect_mobi8
|
||||||
|
|
||||||
|
def inspect_mobi(path_or_stream, ddir=None): # {{{
|
||||||
|
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
||||||
|
open(path_or_stream, 'rb'))
|
||||||
|
f = MOBIFile(stream)
|
||||||
|
if ddir is None:
|
||||||
|
ddir = 'decompiled_' + os.path.splitext(os.path.basename(stream.name))[0]
|
||||||
|
try:
|
||||||
|
shutil.rmtree(ddir)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
os.makedirs(ddir)
|
||||||
|
if f.kf8_type is None:
|
||||||
|
inspect_mobi6(f, ddir)
|
||||||
|
elif f.kf8_type == 'joint':
|
||||||
|
p6 = os.path.join(ddir, 'mobi6')
|
||||||
|
os.mkdir(p6)
|
||||||
|
inspect_mobi6(f, p6)
|
||||||
|
p8 = os.path.join(ddir, 'mobi8')
|
||||||
|
os.mkdir(p8)
|
||||||
|
inspect_mobi8(f, p8)
|
||||||
|
else:
|
||||||
|
inspect_mobi8(f, ddir)
|
||||||
|
|
||||||
|
print ('Debug data saved to:', ddir)
|
||||||
|
|
||||||
|
# }}}
|
||||||
|
|
||||||
|
def main():
|
||||||
|
inspect_mobi(sys.argv[1])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
||||||
|
|
@ -7,403 +7,20 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import struct, datetime, sys, os, shutil
|
import struct, sys, os
|
||||||
from collections import OrderedDict, defaultdict
|
from collections import OrderedDict, defaultdict
|
||||||
|
|
||||||
from lxml import html
|
from lxml import html
|
||||||
|
|
||||||
from calibre.utils.date import utc_tz
|
|
||||||
from calibre.ebooks.mobi.langcodes import main_language, sub_language
|
|
||||||
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
|
||||||
from calibre.ebooks.mobi.reader.index import (parse_index_record,
|
from calibre.ebooks.mobi.reader.index import (parse_index_record,
|
||||||
parse_tagx_section)
|
parse_tagx_section)
|
||||||
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
|
||||||
get_trailing_data, decode_tbs, read_font_record)
|
decode_tbs, read_font_record)
|
||||||
from calibre.utils.magick.draw import identify_data
|
from calibre.utils.magick.draw import identify_data
|
||||||
|
from calibre.ebooks.mobi.debug import format_bytes
|
||||||
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
|
||||||
def format_bytes(byts):
|
|
||||||
byts = bytearray(byts)
|
|
||||||
byts = [hex(b)[2:] for b in byts]
|
|
||||||
return ' '.join(byts)
|
|
||||||
|
|
||||||
# PalmDB {{{
|
|
||||||
class PalmDOCAttributes(object):
|
|
||||||
|
|
||||||
class Attr(object):
|
|
||||||
|
|
||||||
def __init__(self, name, field, val):
|
|
||||||
self.name = name
|
|
||||||
self.val = val & field
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return '%s: %s'%(self.name, bool(self.val))
|
|
||||||
|
|
||||||
def __init__(self, raw):
|
|
||||||
self.val = struct.unpack(b'<H', raw)[0]
|
|
||||||
self.attributes = []
|
|
||||||
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
|
|
||||||
('Backup this database', 0x08),
|
|
||||||
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
|
|
||||||
('Force the PalmPilot to reset after this database is installed', 0x12),
|
|
||||||
('Don\'t allow copy of file to be beamed to other Pilot',
|
|
||||||
0x14)]:
|
|
||||||
self.attributes.append(PalmDOCAttributes.Attr(name, field,
|
|
||||||
self.val))
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
attrs = '\n\t'.join([str(x) for x in self.attributes])
|
|
||||||
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
|
|
||||||
|
|
||||||
class PalmDB(object):
|
|
||||||
|
|
||||||
def __init__(self, raw):
|
|
||||||
self.raw = raw
|
|
||||||
|
|
||||||
if self.raw.startswith(b'TPZ'):
|
|
||||||
raise ValueError('This is a Topaz file')
|
|
||||||
|
|
||||||
self.name = self.raw[:32].replace(b'\x00', b'')
|
|
||||||
self.attributes = PalmDOCAttributes(self.raw[32:34])
|
|
||||||
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
|
|
||||||
|
|
||||||
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
|
|
||||||
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
|
|
||||||
self.creation_date = (palm_epoch +
|
|
||||||
datetime.timedelta(seconds=self.creation_date_raw))
|
|
||||||
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
|
|
||||||
self.modification_date = (palm_epoch +
|
|
||||||
datetime.timedelta(seconds=self.modification_date_raw))
|
|
||||||
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
|
|
||||||
self.last_backup_date = (palm_epoch +
|
|
||||||
datetime.timedelta(seconds=self.last_backup_date_raw))
|
|
||||||
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
|
|
||||||
self.app_info_id = self.raw[52:56]
|
|
||||||
self.sort_info_id = self.raw[56:60]
|
|
||||||
self.type = self.raw[60:64]
|
|
||||||
self.creator = self.raw[64:68]
|
|
||||||
self.ident = self.type + self.creator
|
|
||||||
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
|
|
||||||
raise ValueError('Unknown book ident: %r'%self.ident)
|
|
||||||
self.last_record_uid, = struct.unpack(b'>I', self.raw[68:72])
|
|
||||||
self.next_rec_list_id = self.raw[72:76]
|
|
||||||
|
|
||||||
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
|
|
||||||
ans.append('Name: %r'%self.name)
|
|
||||||
ans.append(str(self.attributes))
|
|
||||||
ans.append('Version: %s'%self.version)
|
|
||||||
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
|
|
||||||
self.creation_date_raw))
|
|
||||||
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
|
|
||||||
self.modification_date_raw))
|
|
||||||
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
|
|
||||||
self.last_backup_date_raw))
|
|
||||||
ans.append('Modification number: %s'%self.modification_number)
|
|
||||||
ans.append('App Info ID: %r'%self.app_info_id)
|
|
||||||
ans.append('Sort Info ID: %r'%self.sort_info_id)
|
|
||||||
ans.append('Type: %r'%self.type)
|
|
||||||
ans.append('Creator: %r'%self.creator)
|
|
||||||
ans.append('Last record UID +1: %r'%self.last_record_uid)
|
|
||||||
ans.append('Next record list id: %r'%self.next_rec_list_id)
|
|
||||||
ans.append('Number of records: %s'%self.number_of_records)
|
|
||||||
|
|
||||||
return '\n'.join(ans)
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class Record(object): # {{{
|
|
||||||
|
|
||||||
def __init__(self, raw, header):
|
|
||||||
self.offset, self.flags, self.uid = header
|
|
||||||
self.raw = raw
|
|
||||||
|
|
||||||
@property
|
|
||||||
def header(self):
|
|
||||||
return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags,
|
|
||||||
self.uid, self.raw[:4], len(self.raw))
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
# EXTH {{{
|
|
||||||
class EXTHRecord(object):
|
|
||||||
|
|
||||||
def __init__(self, type_, data):
|
|
||||||
self.type = type_
|
|
||||||
self.data = data
|
|
||||||
self.name = {
|
|
||||||
1 : 'DRM Server id',
|
|
||||||
2 : 'DRM Commerce id',
|
|
||||||
3 : 'DRM ebookbase book id',
|
|
||||||
100 : 'author',
|
|
||||||
101 : 'publisher',
|
|
||||||
102 : 'imprint',
|
|
||||||
103 : 'description',
|
|
||||||
104 : 'isbn',
|
|
||||||
105 : 'subject',
|
|
||||||
106 : 'publishingdate',
|
|
||||||
107 : 'review',
|
|
||||||
108 : 'contributor',
|
|
||||||
109 : 'rights',
|
|
||||||
110 : 'subjectcode',
|
|
||||||
111 : 'type',
|
|
||||||
112 : 'source',
|
|
||||||
113 : 'asin',
|
|
||||||
114 : 'versionnumber',
|
|
||||||
115 : 'sample',
|
|
||||||
116 : 'startreading',
|
|
||||||
117 : 'adult',
|
|
||||||
118 : 'retailprice',
|
|
||||||
119 : 'retailpricecurrency',
|
|
||||||
121 : 'KF8 header section index',
|
|
||||||
125 : 'KF8 resources (images/fonts) count',
|
|
||||||
129 : 'KF8 cover URI',
|
|
||||||
131 : 'KF8 unknown count',
|
|
||||||
201 : 'coveroffset',
|
|
||||||
202 : 'thumboffset',
|
|
||||||
203 : 'hasfakecover',
|
|
||||||
204 : 'Creator Software',
|
|
||||||
205 : 'Creator Major Version', # '>I'
|
|
||||||
206 : 'Creator Minor Version', # '>I'
|
|
||||||
207 : 'Creator Build Number', # '>I'
|
|
||||||
208 : 'watermark',
|
|
||||||
209 : 'tamper_proof_keys',
|
|
||||||
300 : 'fontsignature',
|
|
||||||
301 : 'clippinglimit', # percentage '>B'
|
|
||||||
402 : 'publisherlimit',
|
|
||||||
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
|
|
||||||
501 : 'cdetype', # 4 chars (PDOC or EBOK)
|
|
||||||
502 : 'lastupdatetime',
|
|
||||||
503 : 'updatedtitle',
|
|
||||||
}.get(self.type, repr(self.type))
|
|
||||||
|
|
||||||
if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
|
|
||||||
'Creator Major Version', 'Creator Minor Version',
|
|
||||||
'Creator Build Number', 'Creator Software', 'startreading'} or
|
|
||||||
self.type in {121, 125, 131}):
|
|
||||||
self.data, = struct.unpack(b'>I', self.data)
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
return '%s (%d): %r'%(self.name, self.type, self.data)
|
|
||||||
|
|
||||||
class EXTHHeader(object):
|
|
||||||
|
|
||||||
def __init__(self, raw):
|
|
||||||
self.raw = raw
|
|
||||||
if not self.raw.startswith(b'EXTH'):
|
|
||||||
raise ValueError('EXTH header does not start with EXTH')
|
|
||||||
self.length, = struct.unpack(b'>I', self.raw[4:8])
|
|
||||||
self.count, = struct.unpack(b'>I', self.raw[8:12])
|
|
||||||
|
|
||||||
pos = 12
|
|
||||||
self.records = []
|
|
||||||
for i in xrange(self.count):
|
|
||||||
pos = self.read_record(pos)
|
|
||||||
self.records.sort(key=lambda x:x.type)
|
|
||||||
|
|
||||||
def read_record(self, pos):
|
|
||||||
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
|
|
||||||
data = self.raw[(pos+8):(pos+length)]
|
|
||||||
self.records.append(EXTHRecord(type_, data))
|
|
||||||
return pos + length
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
|
|
||||||
ans.append('EXTH header length: %d'%self.length)
|
|
||||||
ans.append('Number of EXTH records: %d'%self.count)
|
|
||||||
ans.append('EXTH records...')
|
|
||||||
for r in self.records:
|
|
||||||
ans.append(str(r))
|
|
||||||
return '\n'.join(ans)
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class MOBIHeader(object): # {{{
|
|
||||||
|
|
||||||
def __init__(self, record0):
|
|
||||||
self.raw = record0.raw
|
|
||||||
|
|
||||||
self.compression_raw = self.raw[:2]
|
|
||||||
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
|
|
||||||
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
|
|
||||||
self.compression_raw)[0],
|
|
||||||
repr(self.compression_raw))
|
|
||||||
self.unused = self.raw[2:4]
|
|
||||||
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
|
|
||||||
self.number_of_text_records, self.text_record_size = \
|
|
||||||
struct.unpack(b'>HH', self.raw[8:12])
|
|
||||||
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
|
|
||||||
self.encryption_type = {
|
|
||||||
0: 'No encryption',
|
|
||||||
1: 'Old mobipocket encryption',
|
|
||||||
2: 'Mobipocket encryption'
|
|
||||||
}.get(self.encryption_type_raw, repr(self.encryption_type_raw))
|
|
||||||
self.unknown = self.raw[14:16]
|
|
||||||
|
|
||||||
self.identifier = self.raw[16:20]
|
|
||||||
if self.identifier != b'MOBI':
|
|
||||||
raise ValueError('Identifier %r unknown'%self.identifier)
|
|
||||||
|
|
||||||
self.length, = struct.unpack(b'>I', self.raw[20:24])
|
|
||||||
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
|
|
||||||
self.type = {
|
|
||||||
2 : 'Mobipocket book',
|
|
||||||
3 : 'PalmDOC book',
|
|
||||||
4 : 'Audio',
|
|
||||||
257 : 'News',
|
|
||||||
258 : 'News Feed',
|
|
||||||
259 : 'News magazine',
|
|
||||||
513 : 'PICS',
|
|
||||||
514 : 'Word',
|
|
||||||
515 : 'XLS',
|
|
||||||
516 : 'PPT',
|
|
||||||
517 : 'TEXT',
|
|
||||||
518 : 'HTML',
|
|
||||||
}.get(self.type_raw, repr(self.type_raw))
|
|
||||||
|
|
||||||
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
|
|
||||||
self.encoding = {
|
|
||||||
1252 : 'cp1252',
|
|
||||||
65001: 'utf-8',
|
|
||||||
}.get(self.encoding_raw, repr(self.encoding_raw))
|
|
||||||
self.uid = self.raw[32:36]
|
|
||||||
self.file_version = struct.unpack(b'>I', self.raw[36:40])
|
|
||||||
self.reserved = self.raw[40:48]
|
|
||||||
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
|
|
||||||
self.reserved2 = self.raw[52:80]
|
|
||||||
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
|
|
||||||
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
|
|
||||||
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
|
|
||||||
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
|
|
||||||
langcode = self.locale_raw
|
|
||||||
langid = langcode & 0xFF
|
|
||||||
sublangid = (langcode >> 10) & 0xFF
|
|
||||||
self.language = main_language.get(langid, 'ENGLISH')
|
|
||||||
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
|
|
||||||
|
|
||||||
self.input_language = self.raw[96:100]
|
|
||||||
self.output_langauage = self.raw[100:104]
|
|
||||||
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
|
|
||||||
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
|
|
||||||
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
|
|
||||||
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
|
|
||||||
self.datp_record_offset, = struct.unpack(b'>I', self.raw[120:124])
|
|
||||||
self.datp_record_count, = struct.unpack(b'>I', self.raw[124:128])
|
|
||||||
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
|
|
||||||
self.has_exth = bool(self.exth_flags & 0x40)
|
|
||||||
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
|
|
||||||
if self.has_drm_data:
|
|
||||||
self.unknown3 = self.raw[132:164]
|
|
||||||
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
|
|
||||||
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
|
|
||||||
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
|
|
||||||
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
|
|
||||||
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
|
|
||||||
self.has_fcis_flis = False
|
|
||||||
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
|
|
||||||
self.extra_data_flags = 0
|
|
||||||
if self.has_extra_data_flags:
|
|
||||||
self.unknown4 = self.raw[180:192]
|
|
||||||
self.first_content_record, self.last_content_record = \
|
|
||||||
struct.unpack(b'>HH', self.raw[192:196])
|
|
||||||
self.unknown5, = struct.unpack(b'>I', self.raw[196:200])
|
|
||||||
(self.fcis_number, self.fcis_count, self.flis_number,
|
|
||||||
self.flis_count) = struct.unpack(b'>IIII',
|
|
||||||
self.raw[200:216])
|
|
||||||
self.unknown6 = self.raw[216:224]
|
|
||||||
self.srcs_record_index = struct.unpack(b'>I',
|
|
||||||
self.raw[224:228])[0]
|
|
||||||
self.num_srcs_records = struct.unpack(b'>I',
|
|
||||||
self.raw[228:232])[0]
|
|
||||||
self.unknown7 = self.raw[232:240]
|
|
||||||
self.extra_data_flags = struct.unpack(b'>I',
|
|
||||||
self.raw[240:244])[0]
|
|
||||||
self.has_multibytes = bool(self.extra_data_flags & 0b1)
|
|
||||||
self.has_indexing_bytes = bool(self.extra_data_flags & 0b10)
|
|
||||||
self.has_uncrossable_breaks = bool(self.extra_data_flags & 0b100)
|
|
||||||
self.primary_index_record, = struct.unpack(b'>I',
|
|
||||||
self.raw[244:248])
|
|
||||||
|
|
||||||
if self.has_exth:
|
|
||||||
self.exth_offset = 16 + self.length
|
|
||||||
|
|
||||||
self.exth = EXTHHeader(self.raw[self.exth_offset:])
|
|
||||||
|
|
||||||
self.end_of_exth = self.exth_offset + self.exth.length
|
|
||||||
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
ans = ['*'*20 + ' MOBI Header '+ '*'*20]
|
|
||||||
ans.append('Compression: %s'%self.compression)
|
|
||||||
ans.append('Unused: %r'%self.unused)
|
|
||||||
ans.append('Number of text records: %d'%self.number_of_text_records)
|
|
||||||
ans.append('Text record size: %d'%self.text_record_size)
|
|
||||||
ans.append('Encryption: %s'%self.encryption_type)
|
|
||||||
ans.append('Unknown: %r'%self.unknown)
|
|
||||||
ans.append('Identifier: %r'%self.identifier)
|
|
||||||
ans.append('Header length: %d'% self.length)
|
|
||||||
ans.append('Type: %s'%self.type)
|
|
||||||
ans.append('Encoding: %s'%self.encoding)
|
|
||||||
ans.append('UID: %r'%self.uid)
|
|
||||||
ans.append('File version: %d'%self.file_version)
|
|
||||||
ans.append('Reserved: %r'%self.reserved)
|
|
||||||
ans.append('Secondary index record: %d (null val: %d)'%(
|
|
||||||
self.secondary_index_record, NULL_INDEX))
|
|
||||||
ans.append('Reserved2: %r'%self.reserved2)
|
|
||||||
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
|
|
||||||
self.first_non_book_record))
|
|
||||||
ans.append('Full name offset: %d'%self.fullname_offset)
|
|
||||||
ans.append('Full name length: %d bytes'%self.fullname_length)
|
|
||||||
ans.append('Langcode: %r'%self.locale_raw)
|
|
||||||
ans.append('Language: %s'%self.language)
|
|
||||||
ans.append('Sub language: %s'%self.sublanguage)
|
|
||||||
ans.append('Input language: %r'%self.input_language)
|
|
||||||
ans.append('Output language: %r'%self.output_langauage)
|
|
||||||
ans.append('Min version: %d'%self.min_version)
|
|
||||||
ans.append('First Image index: %d'%self.first_image_index)
|
|
||||||
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
|
|
||||||
ans.append('Huffman record count: %d'%self.huffman_record_count)
|
|
||||||
ans.append('DATP record offset: %r'%self.datp_record_offset)
|
|
||||||
ans.append('DATP record count: %r'%self.datp_record_count)
|
|
||||||
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
|
|
||||||
if self.has_drm_data:
|
|
||||||
ans.append('Unknown3: %r'%self.unknown3)
|
|
||||||
ans.append('DRM Offset: %s'%self.drm_offset)
|
|
||||||
ans.append('DRM Count: %s'%self.drm_count)
|
|
||||||
ans.append('DRM Size: %s'%self.drm_size)
|
|
||||||
ans.append('DRM Flags: %r'%self.drm_flags)
|
|
||||||
if self.has_extra_data_flags:
|
|
||||||
ans.append('Unknown4: %r'%self.unknown4)
|
|
||||||
ans.append('First content record: %d'% self.first_content_record)
|
|
||||||
ans.append('Last content record: %d'% self.last_content_record)
|
|
||||||
ans.append('Unknown5: %d'% self.unknown5)
|
|
||||||
ans.append('FCIS number: %d'% self.fcis_number)
|
|
||||||
ans.append('FCIS count: %d'% self.fcis_count)
|
|
||||||
ans.append('FLIS number: %d'% self.flis_number)
|
|
||||||
ans.append('FLIS count: %d'% self.flis_count)
|
|
||||||
ans.append('Unknown6: %r'% self.unknown6)
|
|
||||||
ans.append('SRCS record index: %d'%self.srcs_record_index)
|
|
||||||
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
|
|
||||||
ans.append('Unknown7: %r'%self.unknown7)
|
|
||||||
ans.append(('Extra data flags: %s (has multibyte: %s) '
|
|
||||||
'(has indexing: %s) (has uncrossable breaks: %s)')%(
|
|
||||||
bin(self.extra_data_flags), self.has_multibytes,
|
|
||||||
self.has_indexing_bytes, self.has_uncrossable_breaks ))
|
|
||||||
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
|
|
||||||
self.primary_index_record))
|
|
||||||
|
|
||||||
ans = '\n'.join(ans)
|
|
||||||
|
|
||||||
if self.has_exth:
|
|
||||||
ans += '\n\n' + str(self.exth)
|
|
||||||
ans += '\n\nBytes after EXTH (%d bytes): %s'%(
|
|
||||||
len(self.bytes_after_exth),
|
|
||||||
format_bytes(self.bytes_after_exth))
|
|
||||||
|
|
||||||
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
|
|
||||||
self.fullname_length))
|
|
||||||
|
|
||||||
ans += '\nRecord 0 length: %d'%len(self.raw)
|
|
||||||
return ans
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class TagX(object): # {{{
|
class TagX(object): # {{{
|
||||||
|
|
||||||
@ -856,39 +473,6 @@ class CNCX(object): # {{{
|
|||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
class TextRecord(object): # {{{
|
|
||||||
|
|
||||||
def __init__(self, idx, record, extra_data_flags, decompress):
|
|
||||||
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
|
|
||||||
raw_trailing_bytes = record.raw[len(self.raw):]
|
|
||||||
self.raw = decompress(self.raw)
|
|
||||||
|
|
||||||
if 0 in self.trailing_data:
|
|
||||||
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
|
|
||||||
if 1 in self.trailing_data:
|
|
||||||
self.trailing_data['indexing'] = self.trailing_data.pop(1)
|
|
||||||
if 2 in self.trailing_data:
|
|
||||||
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
|
|
||||||
self.trailing_data['raw_bytes'] = raw_trailing_bytes
|
|
||||||
|
|
||||||
for typ, val in self.trailing_data.iteritems():
|
|
||||||
if isinstance(typ, int):
|
|
||||||
print ('Record %d has unknown trailing data of type: %d : %r'%
|
|
||||||
(idx, typ, val))
|
|
||||||
|
|
||||||
self.idx = idx
|
|
||||||
|
|
||||||
def dump(self, folder):
|
|
||||||
name = '%06d'%self.idx
|
|
||||||
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
|
|
||||||
f.write(self.raw)
|
|
||||||
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
|
|
||||||
for k, v in self.trailing_data.iteritems():
|
|
||||||
raw = '%s : %r\n\n'%(k, v)
|
|
||||||
f.write(raw.encode('utf-8'))
|
|
||||||
|
|
||||||
# }}}
|
|
||||||
|
|
||||||
class ImageRecord(object): # {{{
|
class ImageRecord(object): # {{{
|
||||||
|
|
||||||
def __init__(self, idx, record, fmt):
|
def __init__(self, idx, record, fmt):
|
||||||
@ -1130,46 +714,10 @@ class TBSIndexing(object): # {{{
|
|||||||
|
|
||||||
class MOBIFile(object): # {{{
|
class MOBIFile(object): # {{{
|
||||||
|
|
||||||
def __init__(self, stream):
|
def __init__(self, mf):
|
||||||
self.raw = stream.read()
|
for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
|
||||||
|
'huffman_record_nums',):
|
||||||
self.palmdb = PalmDB(self.raw[:78])
|
setattr(self, x, getattr(mf, x))
|
||||||
|
|
||||||
self.record_headers = []
|
|
||||||
self.records = []
|
|
||||||
for i in xrange(self.palmdb.number_of_records):
|
|
||||||
pos = 78 + i * 8
|
|
||||||
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
|
|
||||||
flags, val = a1, a2 << 16 | a3 << 8 | a4
|
|
||||||
self.record_headers.append((offset, flags, val))
|
|
||||||
|
|
||||||
def section(section_number):
|
|
||||||
if section_number == self.palmdb.number_of_records - 1:
|
|
||||||
end_off = len(self.raw)
|
|
||||||
else:
|
|
||||||
end_off = self.record_headers[section_number + 1][0]
|
|
||||||
off = self.record_headers[section_number][0]
|
|
||||||
return self.raw[off:end_off]
|
|
||||||
|
|
||||||
for i in range(self.palmdb.number_of_records):
|
|
||||||
self.records.append(Record(section(i), self.record_headers[i]))
|
|
||||||
|
|
||||||
self.mobi_header = MOBIHeader(self.records[0])
|
|
||||||
self.huffman_record_nums = []
|
|
||||||
|
|
||||||
if 'huff' in self.mobi_header.compression.lower():
|
|
||||||
self.huffman_record_nums = list(xrange(self.mobi_header.huffman_record_offset,
|
|
||||||
self.mobi_header.huffman_record_offset +
|
|
||||||
self.mobi_header.huffman_record_count))
|
|
||||||
huffrecs = [self.records[r].raw for r in self.huffman_record_nums]
|
|
||||||
from calibre.ebooks.mobi.huffcdic import HuffReader
|
|
||||||
huffs = HuffReader(huffrecs)
|
|
||||||
decompress = huffs.unpack
|
|
||||||
elif 'palmdoc' in self.mobi_header.compression.lower():
|
|
||||||
from calibre.ebooks.compression.palmdoc import decompress_doc
|
|
||||||
decompress = decompress_doc
|
|
||||||
else:
|
|
||||||
decompress = lambda x: x
|
|
||||||
|
|
||||||
self.index_header = self.index_record = None
|
self.index_header = self.index_record = None
|
||||||
self.indexing_record_nums = set()
|
self.indexing_record_nums = set()
|
||||||
@ -1201,7 +749,7 @@ class MOBIFile(object): # {{{
|
|||||||
if fntbr == NULL_INDEX:
|
if fntbr == NULL_INDEX:
|
||||||
fntbr = len(self.records)
|
fntbr = len(self.records)
|
||||||
self.text_records = [TextRecord(r, self.records[r],
|
self.text_records = [TextRecord(r, self.records[r],
|
||||||
self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
|
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
|
||||||
min(len(self.records), ntr+1))]
|
min(len(self.records), ntr+1))]
|
||||||
self.image_records, self.binary_records = [], []
|
self.image_records, self.binary_records = [], []
|
||||||
self.font_records = []
|
self.font_records = []
|
||||||
@ -1241,17 +789,8 @@ class MOBIFile(object): # {{{
|
|||||||
print (str(self.mobi_header).encode('utf-8'), file=f)
|
print (str(self.mobi_header).encode('utf-8'), file=f)
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def inspect_mobi(path_or_stream, ddir=None): # {{{
|
def inspect_mobi(mobi_file, ddir):
|
||||||
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
|
f = MOBIFile(mobi_file)
|
||||||
open(path_or_stream, 'rb'))
|
|
||||||
f = MOBIFile(stream)
|
|
||||||
if ddir is None:
|
|
||||||
ddir = 'decompiled_' + os.path.splitext(os.path.basename(stream.name))[0]
|
|
||||||
try:
|
|
||||||
shutil.rmtree(ddir)
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
os.makedirs(ddir)
|
|
||||||
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
f.print_header(f=out)
|
f.print_header(f=out)
|
||||||
|
|
||||||
@ -1262,13 +801,12 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
|||||||
of.write(rec.raw)
|
of.write(rec.raw)
|
||||||
alltext += rec.raw
|
alltext += rec.raw
|
||||||
of.seek(0)
|
of.seek(0)
|
||||||
if f.mobi_header.file_version < 8:
|
|
||||||
root = html.fromstring(alltext.decode('utf-8'))
|
root = html.fromstring(alltext.decode('utf-8'))
|
||||||
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
|
||||||
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
|
||||||
include_meta_content_type=True))
|
include_meta_content_type=True))
|
||||||
|
|
||||||
|
|
||||||
if f.index_header is not None:
|
if f.index_header is not None:
|
||||||
f.index_record.alltext = alltext
|
f.index_record.alltext = alltext
|
||||||
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
|
||||||
@ -1295,13 +833,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
|
|||||||
rec.dump(tdir)
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
|
||||||
print ('Debug data saved to:', ddir)
|
|
||||||
|
|
||||||
# }}}
|
# }}}
|
||||||
|
|
||||||
def main():
|
|
||||||
inspect_mobi(sys.argv[1])
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
||||||
|
|
62
src/calibre/ebooks/mobi/debug/mobi8.py
Normal file
62
src/calibre/ebooks/mobi/debug/mobi8.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
|
||||||
|
from __future__ import (unicode_literals, division, absolute_import,
|
||||||
|
print_function)
|
||||||
|
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
|
import sys, os
|
||||||
|
|
||||||
|
from calibre.ebooks.mobi.debug.headers import TextRecord
|
||||||
|
|
||||||
|
class MOBIFile(object):
|
||||||
|
|
||||||
|
def __init__(self, mf):
|
||||||
|
self.mf = mf
|
||||||
|
h, h8 = mf.mobi_header, mf.mobi8_header
|
||||||
|
first_text_record = 1
|
||||||
|
offset = 0
|
||||||
|
res_end = len(mf.records)
|
||||||
|
if mf.kf8_type == 'joint':
|
||||||
|
offset = h.exth.kf8_header_index
|
||||||
|
res_end = offset - 1
|
||||||
|
|
||||||
|
self.resource_records = mf.records[h.first_non_book_record:res_end]
|
||||||
|
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
|
||||||
|
mf.decompress8) for i, r in
|
||||||
|
enumerate(mf.records[first_text_record+offset:
|
||||||
|
first_text_record+offset+h8.number_of_text_records])]
|
||||||
|
|
||||||
|
self.raw_text = b''.join(r.raw for r in self.text_records)
|
||||||
|
|
||||||
|
def print_header(self, f=sys.stdout):
|
||||||
|
print (str(self.mf.palmdb).encode('utf-8'), file=f)
|
||||||
|
print (file=f)
|
||||||
|
print ('Record headers:', file=f)
|
||||||
|
for i, r in enumerate(self.mf.records):
|
||||||
|
print ('%6d. %s'%(i, r.header), file=f)
|
||||||
|
|
||||||
|
print (file=f)
|
||||||
|
print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
|
||||||
|
|
||||||
|
|
||||||
|
def inspect_mobi(mobi_file, ddir):
|
||||||
|
f = MOBIFile(mobi_file)
|
||||||
|
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
|
||||||
|
f.print_header(f=out)
|
||||||
|
|
||||||
|
alltext = os.path.join(ddir, 'raw_text.html')
|
||||||
|
with open(alltext, 'wb') as of:
|
||||||
|
of.write(f.raw_text)
|
||||||
|
|
||||||
|
for tdir, attr in [('text_records', 'text_records'), ('images',
|
||||||
|
'image_records'), ('binary', 'binary_records'), ('font',
|
||||||
|
'font_records')]:
|
||||||
|
tdir = os.path.join(ddir, tdir)
|
||||||
|
os.mkdir(tdir)
|
||||||
|
for rec in getattr(f, attr, []):
|
||||||
|
rec.dump(tdir)
|
||||||
|
|
||||||
|
|
@ -186,20 +186,16 @@ class BookHeader(object):
|
|||||||
if len(raw) >= 0xF8:
|
if len(raw) >= 0xF8:
|
||||||
self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
|
self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
|
||||||
|
|
||||||
if self.mobi_version >= 8:
|
# Ancient PRC files from Baen can have random values for
|
||||||
self.skelidx, = struct.unpack_from('>L', raw, 0xFC)
|
# mobi_version, so be conservative
|
||||||
|
if self.mobi_version == 8 and len(raw) >= (0xF8 + 16):
|
||||||
# Index into <div> sections in raw_ml
|
self.dividx, self.skelidx, self.datpidx, self.othidx = \
|
||||||
self.dividx, = struct.unpack_from('>L', raw, 0xF8)
|
struct.unpack_from(b'>4L', raw, 0xF8)
|
||||||
|
|
||||||
# Index into Other files
|
|
||||||
self.othidx, = struct.unpack_from('>L', raw, 0x104)
|
|
||||||
|
|
||||||
# need to use the FDST record to find out how to properly
|
# need to use the FDST record to find out how to properly
|
||||||
# unpack the raw_ml into pieces it is simply a table of start
|
# unpack the raw_ml into pieces it is simply a table of start
|
||||||
# and end locations for each flow piece
|
# and end locations for each flow piece
|
||||||
self.fdstidx, = struct.unpack_from('>L', raw, 0xC0)
|
self.fdstidx, self.fdstcnt = struct.unpack_from(b'>2L', raw, 0xC0)
|
||||||
self.fdstcnt, = struct.unpack_from('>L', raw, 0xC4)
|
|
||||||
# if cnt is 1 or less, fdst section number can be garbage
|
# if cnt is 1 or less, fdst section number can be garbage
|
||||||
if self.fdstcnt <= 1:
|
if self.fdstcnt <= 1:
|
||||||
self.fdstidx = NULL_INDEX
|
self.fdstidx = NULL_INDEX
|
||||||
|
@ -33,9 +33,11 @@ def update_internal_links(mobi8_reader):
|
|||||||
for m in posfid_index_pattern.finditer(tag):
|
for m in posfid_index_pattern.finditer(tag):
|
||||||
posfid = m.group(1)
|
posfid = m.group(1)
|
||||||
offset = m.group(2)
|
offset = m.group(2)
|
||||||
filename, idtag = mr.get_id_tag_by_pos_fid(posfid, offset)
|
filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32),
|
||||||
|
int(offset, 32))
|
||||||
suffix = (b'#' + idtag) if idtag else b''
|
suffix = (b'#' + idtag) if idtag else b''
|
||||||
replacement = filename.encode(mr.header.codec) + suffix
|
replacement = filename.split('/')[-1].encode(
|
||||||
|
mr.header.codec) + suffix
|
||||||
tag = posfid_index_pattern.sub(replacement, tag, 1)
|
tag = posfid_index_pattern.sub(replacement, tag, 1)
|
||||||
srcpieces[j] = tag
|
srcpieces[j] = tag
|
||||||
part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
|
part = ''.join([x.decode(mr.header.codec) for x in srcpieces])
|
||||||
|
@ -107,7 +107,10 @@ class MobiReader(object):
|
|||||||
self.kf8_type = None
|
self.kf8_type = None
|
||||||
k8i = getattr(self.book_header.exth, 'kf8_header', None)
|
k8i = getattr(self.book_header.exth, 'kf8_header', None)
|
||||||
|
|
||||||
if self.book_header.mobi_version == 8:
|
# Ancient PRC files from Baen can have random values for
|
||||||
|
# mobi_version, so be conservative
|
||||||
|
if (self.book_header.mobi_version == 8 and hasattr(self.book_header,
|
||||||
|
'skelidx')):
|
||||||
self.kf8_type = 'standalone'
|
self.kf8_type = 'standalone'
|
||||||
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
|
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
|
||||||
try:
|
try:
|
||||||
@ -118,12 +121,17 @@ class MobiReader(object):
|
|||||||
try:
|
try:
|
||||||
self.book_header = BookHeader(self.sections[k8i][0],
|
self.book_header = BookHeader(self.sections[k8i][0],
|
||||||
self.ident, user_encoding, self.log)
|
self.ident, user_encoding, self.log)
|
||||||
# The following are only correct in the Mobi 6
|
|
||||||
# header not the Mobi 8 header
|
# Only the first_image_index from the MOBI 6 header is
|
||||||
|
# useful
|
||||||
for x in ('first_image_index',):
|
for x in ('first_image_index',):
|
||||||
setattr(self.book_header, x, getattr(bh, x))
|
setattr(self.book_header, x, getattr(bh, x))
|
||||||
|
|
||||||
|
# We need to do this because the MOBI 6 text extract code
|
||||||
|
# does not know anything about the kf8 offset
|
||||||
if hasattr(self.book_header, 'huff_offset'):
|
if hasattr(self.book_header, 'huff_offset'):
|
||||||
self.book_header.huff_offset += k8i
|
self.book_header.huff_offset += k8i
|
||||||
|
|
||||||
self.kf8_type = 'joint'
|
self.kf8_type = 'joint'
|
||||||
self.kf8_boundary = k8i-1
|
self.kf8_boundary = k8i-1
|
||||||
except:
|
except:
|
||||||
|
@ -230,11 +230,9 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
def get_id_tag_by_pos_fid(self, posfid, offset):
|
def get_id_tag_by_pos_fid(self, posfid, offset):
|
||||||
# first convert kindle:pos:fid and offset info to position in file
|
# first convert kindle:pos:fid and offset info to position in file
|
||||||
row = int(posfid, 32)
|
insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid]
|
||||||
off = int(offset, 32)
|
pos = insertpos + offset
|
||||||
[insertpos, idtext, filenum, seqnm, startpos, length] = self.elems[row]
|
fi = self.get_file_info(pos)
|
||||||
pos = insertpos + off
|
|
||||||
fname = self.get_file_info(pos).filename
|
|
||||||
# an existing "id=" must exist in original xhtml otherwise it would not
|
# an existing "id=" must exist in original xhtml otherwise it would not
|
||||||
# have worked for linking. Amazon seems to have added its own
|
# have worked for linking. Amazon seems to have added its own
|
||||||
# additional "aid=" inside tags whose contents seem to represent some
|
# additional "aid=" inside tags whose contents seem to represent some
|
||||||
@ -243,7 +241,7 @@ class Mobi8Reader(object):
|
|||||||
# so find the closest "id=" before position the file by actually
|
# so find the closest "id=" before position the file by actually
|
||||||
# searching in that file
|
# searching in that file
|
||||||
idtext = self.get_id_tag(pos)
|
idtext = self.get_id_tag(pos)
|
||||||
return fname, idtext
|
return '%s/%s'%(fi.type, fi.filename), idtext
|
||||||
|
|
||||||
def get_id_tag(self, pos):
|
def get_id_tag(self, pos):
|
||||||
# find the correct tag by actually searching in the destination
|
# find the correct tag by actually searching in the destination
|
||||||
@ -254,12 +252,13 @@ class Mobi8Reader(object):
|
|||||||
textblock = self.parts[fi.num]
|
textblock = self.parts[fi.num]
|
||||||
id_map = []
|
id_map = []
|
||||||
npos = pos - fi.start
|
npos = pos - fi.start
|
||||||
# if npos inside a tag then search all text before the its end of tag
|
|
||||||
# marker
|
|
||||||
pgt = textblock.find(b'>', npos)
|
pgt = textblock.find(b'>', npos)
|
||||||
plt = textblock.find(b'<', npos)
|
plt = textblock.find(b'<', npos)
|
||||||
if pgt < plt:
|
# if npos inside a tag then search all text before the its end of tag marker
|
||||||
|
# else not in a tag need to search the preceding tag
|
||||||
|
if plt == npos or pgt < plt:
|
||||||
npos = pgt + 1
|
npos = pgt + 1
|
||||||
|
textblock = textblock[0:npos]
|
||||||
# find id links only inside of tags
|
# find id links only inside of tags
|
||||||
# inside any < > pair find all "id=' and return whatever is inside
|
# inside any < > pair find all "id=' and return whatever is inside
|
||||||
# the quotes
|
# the quotes
|
||||||
@ -316,13 +315,18 @@ class Mobi8Reader(object):
|
|||||||
|
|
||||||
# Add href and anchor info to the index entries
|
# Add href and anchor info to the index entries
|
||||||
for entry in index_entries:
|
for entry in index_entries:
|
||||||
|
pos_fid = entry['pos_fid']
|
||||||
|
if pos_fid is None:
|
||||||
pos = entry['pos']
|
pos = entry['pos']
|
||||||
fi = self.get_file_info(pos)
|
fi = self.get_file_info(pos)
|
||||||
#print (11111111, fi, entry['pos_fid'])
|
|
||||||
if fi.filename is None:
|
if fi.filename is None:
|
||||||
raise ValueError('Index entry has invalid pos: %d'%pos)
|
raise ValueError('Index entry has invalid pos: %d'%pos)
|
||||||
idtag = self.get_id_tag(pos).decode(self.header.codec)
|
idtag = self.get_id_tag(pos).decode(self.header.codec)
|
||||||
entry['href'] = '%s/%s'%(fi.type, fi.filename)
|
href = '%s/%s'%(fi.type, fi.filename)
|
||||||
|
else:
|
||||||
|
href, idtag = self.get_id_tag_by_pos_fid(*pos_fid)
|
||||||
|
|
||||||
|
entry['href'] = href
|
||||||
entry['idtag'] = idtag
|
entry['idtag'] = idtag
|
||||||
|
|
||||||
# Build the TOC object
|
# Build the TOC object
|
||||||
|
@ -70,6 +70,9 @@ class AddAction(InterfaceAction):
|
|||||||
self.add_menu.addSeparator()
|
self.add_menu.addSeparator()
|
||||||
ma('add-formats', _('Add files to selected book records'),
|
ma('add-formats', _('Add files to selected book records'),
|
||||||
triggered=self.add_formats, shortcut=_('Shift+A'))
|
triggered=self.add_formats, shortcut=_('Shift+A'))
|
||||||
|
self.add_menu.addSeparator()
|
||||||
|
ma('add-config', _('Configure the adding of books'),
|
||||||
|
triggered=self.add_config)
|
||||||
|
|
||||||
self.qaction.triggered.connect(self.add_books)
|
self.qaction.triggered.connect(self.add_books)
|
||||||
|
|
||||||
@ -78,6 +81,11 @@ class AddAction(InterfaceAction):
|
|||||||
for action in list(self.add_menu.actions())[1:]:
|
for action in list(self.add_menu.actions())[1:]:
|
||||||
action.setEnabled(enabled)
|
action.setEnabled(enabled)
|
||||||
|
|
||||||
|
def add_config(self):
|
||||||
|
self.gui.iactions['Preferences'].do_config(
|
||||||
|
initial_plugin=('Import/Export', 'Adding'),
|
||||||
|
close_after_initial=True)
|
||||||
|
|
||||||
def add_formats(self, *args):
|
def add_formats(self, *args):
|
||||||
if self.gui.stack.currentIndex() != 0:
|
if self.gui.stack.currentIndex() != 0:
|
||||||
return
|
return
|
||||||
|
@ -591,6 +591,21 @@ def educateQuotes(str):
|
|||||||
str = re.sub(r'''""''', """””""", str)
|
str = re.sub(r'''""''', """””""", str)
|
||||||
str = re.sub(r"""''""", """’’""", str)
|
str = re.sub(r"""''""", """’’""", str)
|
||||||
|
|
||||||
|
# Special case for Quotes at inside of other entities, e.g.:
|
||||||
|
# <p>A double quote--"within dashes"--would be nice.</p>
|
||||||
|
str = re.sub(r"""(?<=\W)"(?=\w)""", r"""“""", str)
|
||||||
|
str = re.sub(r"""(?<=\W)'(?=\w)""", r"""‘""", str)
|
||||||
|
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""”""", str)
|
||||||
|
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""’""", str)
|
||||||
|
|
||||||
|
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
|
||||||
|
str = re.sub(r"""(?<=\s)"$""", r"""”""", str)
|
||||||
|
str = re.sub(r"""(?<=\s)'$""", r"""’""", str)
|
||||||
|
|
||||||
|
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
|
||||||
|
str = re.sub(r"""^"(?=\s)""", r"""“""", str)
|
||||||
|
str = re.sub(r"""^'(?=\s)""", r"""‘""", str)
|
||||||
|
|
||||||
# Special case for decade abbreviations (the '80s):
|
# Special case for decade abbreviations (the '80s):
|
||||||
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
str = re.sub(r"""\b'(?=\d{2}s)""", r"""’""", str)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user