Merge from trunk

This commit is contained in:
Charles Haley 2012-03-20 19:01:20 +01:00
commit ab19edb96f
36 changed files with 2062 additions and 1782 deletions

View File

@ -3,10 +3,11 @@ from calibre.web.feeds.news import BasicNewsRecipe
class AdvancedUserRecipe1325006965(BasicNewsRecipe):
title = u'FHM UK'
description = 'Good News for Men'
cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
cover_url = 'http://www.greatmagazines.co.uk/covers/large/w197/current/fhm.jpg'
# cover_url = 'http://profile.ak.fbcdn.net/hprofile-ak-snc4/373529_38324934806_64930243_n.jpg'
masthead_url = 'http://www.fhm.com/App_Resources/Images/Site/re-design/logo.gif'
__author__ = 'Dave Asbury'
# last updated 27/1/12
# last updated 17/3/12
language = 'en_GB'
oldest_article = 28
max_articles_per_feed = 12
@ -29,6 +30,8 @@ class AdvancedUserRecipe1325006965(BasicNewsRecipe):
feeds = [
(u'From the Homepage',u'http://feed43.com/8053226782885416.xml'),
(u'Funny - The Very Best Of The Internet',u'http://feed43.com/4538510106331565.xml'),
(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
(u'Upgrade',u'http://feed43.com/0877305847443234.xml'),
#(u'The Final Countdown', u'http://feed43.com/3576106158530118.xml'),
#(u'Gaming',u'http://feed43.com/0755006465351035.xml'),
(u'Gaming',u'http://feed43.com/6537162612465672.xml'),
]

View File

@ -0,0 +1,43 @@
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
ivanamilakovic.blogspot.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class IvanaMilakovic(BasicNewsRecipe):
title = u'Ivana Milaković'
__author__ = 'Darko Miletic'
description = u'Hronika mačijeg škrabala - priče, inspiracija, knjige, pisanje, prevodi...'
oldest_article = 80
max_articles_per_feed = 100
language = 'sr'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
"""
conversion_options = {
'comment' : description
, 'tags' : 'knjige, blog, srbija, sf'
, 'publisher': 'Ivana Milakovic'
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Posts', u'http://ivanamilakovic.blogspot.com/feeds/posts/default')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

42
recipes/klubknjige.recipe Normal file
View File

@ -0,0 +1,42 @@
__license__ = 'GPL v3'
__copyright__ = '2012, Darko Miletic <darko.miletic at gmail.com>'
'''
klub-knjige.blogspot.com
'''
import re
from calibre.web.feeds.news import BasicNewsRecipe
class KlubKnjige(BasicNewsRecipe):
title = 'Klub knjige'
__author__ = 'Darko Miletic'
description = 'literarni blog'
oldest_article = 30
max_articles_per_feed = 100
language = 'sr'
encoding = 'utf-8'
no_stylesheets = True
use_embedded_content = True
publication_type = 'blog'
extra_css = """
@font-face {font-family: "sans1";src:url(res:///opt/sony/ebook/FONT/tt0003m_.ttf)}
body{font-family: Arial,Tahoma,Helvetica,FreeSans,sans1,sans-serif}
img{margin-bottom: 0.8em; border: 1px solid #333333; padding: 4px }
"""
conversion_options = {
'comment' : description
, 'tags' : 'knjige, blog, srbija, sf'
, 'publisher': 'Klub Knjige'
, 'language' : language
}
preprocess_regexps = [(re.compile(u'\u0110'), lambda match: u'\u00D0')]
feeds = [(u'Posts', u'http://klub-knjige.blogspot.com/feeds/posts/default')]
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
return self.adeify_images(soup)

View File

@ -3,7 +3,6 @@ __copyright__ = '2011'
'''
lemonde.fr
'''
import re
from calibre.web.feeds.recipes import BasicNewsRecipe
class LeMonde(BasicNewsRecipe):
@ -41,77 +40,8 @@ class LeMonde(BasicNewsRecipe):
remove_empty_feeds = True
filterDuplicates = True
auto_cleanup = True
def preprocess_html(self, soup):
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return self.adeify_images(soup)
preprocess_regexps = [
(re.compile(r'([0-9])%'), lambda m: m.group(1) + '&nbsp;%'),
(re.compile(r'([0-9])([0-9])([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + m.group(2) + m.group(3) + '&nbsp;' + m.group(4) + m.group(5) + m.group(6)),
(re.compile(r'([0-9]) ([0-9])([0-9])([0-9])'), lambda m: m.group(1) + '&nbsp;' + m.group(2) + m.group(3) + m.group(4)),
(re.compile(r'<span>'), lambda match: ' <span>'),
(re.compile(r'\("'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'"\)'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'&ldquo;'), lambda match: '(&laquo;&nbsp;'),
(re.compile(r'&rdquo;'), lambda match: '&nbsp;&raquo;)'),
(re.compile(r'>\''), lambda match: '>&lsquo;'),
(re.compile(r' \''), lambda match: ' &lsquo;'),
(re.compile(r'\''), lambda match: '&rsquo;'),
(re.compile(r'"<em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<em>"</em><em>'), lambda match: '<em>&laquo;&nbsp;'),
(re.compile(r'"<a href='), lambda match: '&laquo;&nbsp;<a href='),
(re.compile(r'</em>"'), lambda match: '&nbsp;&raquo;</em>'),
(re.compile(r'</a>"'), lambda match: '&nbsp;&raquo;</a>'),
(re.compile(r'"</'), lambda match: '&nbsp;&raquo;</'),
(re.compile(r'>"'), lambda match: '>&laquo;&nbsp;'),
(re.compile(r'"<'), lambda match: '&nbsp;&raquo;<'),
(re.compile(r'&rsquo;"'), lambda match: '&rsquo;«&nbsp;'),
(re.compile(r' "'), lambda match: ' &laquo;&nbsp;'),
(re.compile(r'" '), lambda match: '&nbsp;&raquo; '),
(re.compile(r'"\.'), lambda match: '&nbsp;&raquo;.'),
(re.compile(r'",'), lambda match: '&nbsp;&raquo;,'),
(re.compile(r'"\?'), lambda match: '&nbsp;&raquo;?'),
(re.compile(r'":'), lambda match: '&nbsp;&raquo;:'),
(re.compile(r'";'), lambda match: '&nbsp;&raquo;;'),
(re.compile(r'"\!'), lambda match: '&nbsp;&raquo;!'),
(re.compile(r' :'), lambda match: '&nbsp;:'),
(re.compile(r' ;'), lambda match: '&nbsp;;'),
(re.compile(r' \?'), lambda match: '&nbsp;?'),
(re.compile(r' \!'), lambda match: '&nbsp;!'),
(re.compile(r'\s»'), lambda match: '&nbsp;»'),
(re.compile(r'«\s'), lambda match: '«&nbsp;'),
(re.compile(r' %'), lambda match: '&nbsp;%'),
(re.compile(r'\.jpg&nbsp;&raquo; border='), lambda match: '.jpg'),
(re.compile(r'\.png&nbsp;&raquo; border='), lambda match: '.png'),
(re.compile(r' &ndash; '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' - '), lambda match: '&nbsp;&ndash; '),
(re.compile(r' -,'), lambda match: '&nbsp;&ndash;,'),
(re.compile(r'&raquo;:'), lambda match: '&raquo;&nbsp;:'),
]
keep_only_tags = [
dict(name='div', attrs={'class':['contenu']})
]
remove_tags = [dict(name='div', attrs={'class':['LM_atome']})]
remove_tags_after = [dict(id='appel_temoignage')]
def get_article_url(self, article):
url = article.get('guid', None)
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
url = None
return url
# def get_article_url(self, article):
# link = article.get('link')
# if 'blog' not in link and ('chat' not in link):
# return link
feeds = [
('A la une', 'http://www.lemonde.fr/rss/une.xml'),
@ -137,3 +67,10 @@ class LeMonde(BasicNewsRecipe):
return cover_url
def get_article_url(self, article):
url = article.get('guid', None)
if '/chat/' in url or '.blog' in url or '/video/' in url or '/sport/' in url or '/portfolio/' in url or '/visuel/' in url :
url = None
return url

View File

@ -14,7 +14,7 @@ from setup.build_environment import msvc, MT, RC
from setup.installer.windows.wix import WixMixIn
OPENSSL_DIR = r'Q:\openssl'
QT_DIR = 'Q:\\Qt\\4.7.3'
QT_DIR = 'Q:\\Qt\\4.8.0'
QT_DLLS = ['Core', 'Gui', 'Network', 'Svg', 'WebKit', 'Xml', 'XmlPatterns']
LIBUNRAR = 'C:\\Program Files\\UnrarDLL\\unrar.dll'
SW = r'C:\cygwin\home\kovid\sw'

View File

@ -97,7 +97,9 @@ Now, run configure and make::
-no-plugin-manifests is needed so that loading the plugins does not fail looking for the CRT assembly
configure -opensource -release -qt-zlib -qt-gif -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
configure -opensource -release -qt-zlib -qt-libmng -qt-libpng -qt-libtiff -qt-libjpeg -release -platform win32-msvc2008 -no-qt3support -webkit -xmlpatterns -no-phonon -no-style-plastique -no-style-cleanlooks -no-style-motif -no-style-cde -no-declarative -no-scripttools -no-audio-backend -no-multimedia -no-dbus -no-openvg -no-opengl -no-qt3support -confirm-license -nomake examples -nomake demos -nomake docs -no-plugin-manifests -openssl -I Q:\openssl\include -L Q:\openssl\lib && nmake
Add the path to the bin folder inside the Qt dir to your system PATH.
SIP
-----

View File

@ -381,12 +381,15 @@ def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
user_agent = USER_AGENT_MOBILE if mobile_browser else USER_AGENT
opener.addheaders = [('User-agent', user_agent)]
proxies = get_proxies()
to_add = {}
http_proxy = proxies.get('http', None)
if http_proxy:
opener.set_proxies({'http':http_proxy})
to_add['http'] = http_proxy
https_proxy = proxies.get('https', None)
if https_proxy:
opener.set_proxies({'https':https_proxy})
to_add['https'] = https_proxy
if to_add:
opener.set_proxies(to_add)
return opener

View File

@ -625,7 +625,8 @@ from calibre.devices.eb600.driver import (EB600, COOL_ER, SHINEBOOK,
POCKETBOOK701, POCKETBOOK360P, PI2)
from calibre.devices.iliad.driver import ILIAD
from calibre.devices.irexdr.driver import IREXDR1000, IREXDR800
from calibre.devices.jetbook.driver import JETBOOK, MIBUK, JETBOOK_MINI
from calibre.devices.jetbook.driver import (JETBOOK, MIBUK, JETBOOK_MINI,
JETBOOK_COLOR)
from calibre.devices.kindle.driver import (KINDLE, KINDLE2, KINDLE_DX,
KINDLE_FIRE)
from calibre.devices.nook.driver import NOOK, NOOK_COLOR
@ -664,9 +665,7 @@ plugins += [
ILIAD,
IREXDR1000,
IREXDR800,
JETBOOK,
JETBOOK_MINI,
MIBUK,
JETBOOK, JETBOOK_MINI, MIBUK, JETBOOK_COLOR,
SHINEBOOK,
POCKETBOOK360, POCKETBOOK301, POCKETBOOK602, POCKETBOOK701, POCKETBOOK360P,
PI2,

View File

@ -234,7 +234,7 @@ def main(args=sys.argv):
sql_dump = args[-1]
reinit_db(opts.reinitialize_db, sql_dump=sql_dump)
elif opts.inspect_mobi:
from calibre.ebooks.mobi.debug import inspect_mobi
from calibre.ebooks.mobi.debug.main import inspect_mobi
for path in args[1:]:
prints('Inspecting:', path)
inspect_mobi(path)

View File

@ -125,4 +125,29 @@ class JETBOOK_MINI(USBMS):
SUPPORTS_SUB_DIRS = True
class JETBOOK_COLOR(USBMS):
'''
set([(u'0x951',
u'0x160b',
u'0x0',
u'Freescale',
u'Mass Storage Device',
u'0802270905553')])
'''
FORMATS = ['epub', 'mobi', 'prc', 'fb2', 'rtf', 'txt', 'pdf', 'djvu']
gui_name = 'JetBook Color'
name = 'JetBook Color Device Interface'
description = _('Communicate with the JetBook Color reader.')
author = 'Kovid Goyal'
VENDOR_ID = [0x951]
PRODUCT_ID = [0x160b]
BCD = [0x0]
EBOOK_DIR_MAIN = 'My Books'
SUPPORTS_SUB_DIRS = True

View File

@ -27,7 +27,7 @@ class PRS505(USBMS):
booklist_class = CollectionsBookList
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt']
FORMATS = ['epub', 'lrf', 'lrx', 'rtf', 'pdf', 'txt', 'zbf']
CAN_SET_METADATA = ['title', 'authors', 'collections']
CAN_DO_DEVICE_DB_PLUGBOARD = True

View File

@ -190,12 +190,22 @@ class EPUBOutput(OutputFormatPlugin):
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode(x).startswith('urn:uuid:'):
uuid = unicode(x).split(':')[-1]
break
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
if uuid is None:
self.log.warn('No UUID identifier found')
from uuid import uuid4
uuid = str(uuid4())
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
if encrypted_fonts and not uuid.startswith('urn:uuid:'):
# Apparently ADE requires this value to start with urn:uuid:
# for some absurd reason, or it will throw a hissy fit and refuse
# to use the obfuscated fonts.
for x in identifiers:
if unicode(x) == uuid:
x.content = 'urn:uuid:'+uuid
with TemporaryDirectory(u'_epub_output') as tdir:
from calibre.customize.ui import plugin_for_output_format
metadata_xml = None
@ -210,7 +220,6 @@ class EPUBOutput(OutputFormatPlugin):
opf = [x for x in os.listdir(tdir) if x.endswith('.opf')][0]
self.condense_ncx([os.path.join(tdir, x) for x in os.listdir(tdir)\
if x.endswith('.ncx')][0])
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
encryption = None
if encrypted_fonts:
encryption = self.encrypt_fonts(encrypted_fonts, tdir, uuid)

View File

@ -59,7 +59,10 @@ class MOBIInput(InputFormatPlugin):
if mr.kf8_type is not None:
log('Found KF8 MOBI of type %r'%mr.kf8_type)
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
return os.path.abspath(Mobi8Reader(mr, log)())
mr = Mobi8Reader(mr, log)
opf = os.path.abspath(mr())
self.encrypted_fonts = mr.encrypted_fonts
return opf
raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw:

View File

@ -179,7 +179,7 @@ class MOBIOutput(OutputFormatPlugin):
writer(oeb, output_path)
if opts.extract_to is not None:
from calibre.ebooks.mobi.debug import inspect_mobi
from calibre.ebooks.mobi.debug.main import inspect_mobi
ddir = opts.extract_to
inspect_mobi(output_path, ddir=ddir)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,16 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
def format_bytes(byts):
byts = bytearray(byts)
byts = [hex(b)[2:] for b in byts]
return ' '.join(byts)

View File

@ -0,0 +1,535 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, datetime, os
from calibre.utils.date import utc_tz
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.langcodes import main_language, sub_language
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.utils import get_trailing_data
# PalmDB {{{
class PalmDOCAttributes(object):
class Attr(object):
def __init__(self, name, field, val):
self.name = name
self.val = val & field
def __str__(self):
return '%s: %s'%(self.name, bool(self.val))
def __init__(self, raw):
self.val = struct.unpack(b'<H', raw)[0]
self.attributes = []
for name, field in [('Read Only', 0x02), ('Dirty AppInfoArea', 0x04),
('Backup this database', 0x08),
('Okay to install newer over existing copy, if present on PalmPilot', 0x10),
('Force the PalmPilot to reset after this database is installed', 0x12),
('Don\'t allow copy of file to be beamed to other Pilot',
0x14)]:
self.attributes.append(PalmDOCAttributes.Attr(name, field,
self.val))
def __str__(self):
attrs = '\n\t'.join([str(x) for x in self.attributes])
return 'PalmDOC Attributes: %s\n\t%s'%(bin(self.val), attrs)
class PalmDB(object):
def __init__(self, raw):
self.raw = raw
if self.raw.startswith(b'TPZ'):
raise ValueError('This is a Topaz file')
self.name = self.raw[:32].replace(b'\x00', b'')
self.attributes = PalmDOCAttributes(self.raw[32:34])
self.version = struct.unpack(b'>H', self.raw[34:36])[0]
palm_epoch = datetime.datetime(1904, 1, 1, tzinfo=utc_tz)
self.creation_date_raw = struct.unpack(b'>I', self.raw[36:40])[0]
self.creation_date = (palm_epoch +
datetime.timedelta(seconds=self.creation_date_raw))
self.modification_date_raw = struct.unpack(b'>I', self.raw[40:44])[0]
self.modification_date = (palm_epoch +
datetime.timedelta(seconds=self.modification_date_raw))
self.last_backup_date_raw = struct.unpack(b'>I', self.raw[44:48])[0]
self.last_backup_date = (palm_epoch +
datetime.timedelta(seconds=self.last_backup_date_raw))
self.modification_number = struct.unpack(b'>I', self.raw[48:52])[0]
self.app_info_id = self.raw[52:56]
self.sort_info_id = self.raw[56:60]
self.type = self.raw[60:64]
self.creator = self.raw[64:68]
self.ident = self.type + self.creator
if self.ident not in (b'BOOKMOBI', b'TEXTREAD'):
raise ValueError('Unknown book ident: %r'%self.ident)
self.last_record_uid, = struct.unpack(b'>I', self.raw[68:72])
self.next_rec_list_id = self.raw[72:76]
self.number_of_records, = struct.unpack(b'>H', self.raw[76:78])
def __str__(self):
ans = ['*'*20 + ' PalmDB Header '+ '*'*20]
ans.append('Name: %r'%self.name)
ans.append(str(self.attributes))
ans.append('Version: %s'%self.version)
ans.append('Creation date: %s (%s)'%(self.creation_date.isoformat(),
self.creation_date_raw))
ans.append('Modification date: %s (%s)'%(self.modification_date.isoformat(),
self.modification_date_raw))
ans.append('Backup date: %s (%s)'%(self.last_backup_date.isoformat(),
self.last_backup_date_raw))
ans.append('Modification number: %s'%self.modification_number)
ans.append('App Info ID: %r'%self.app_info_id)
ans.append('Sort Info ID: %r'%self.sort_info_id)
ans.append('Type: %r'%self.type)
ans.append('Creator: %r'%self.creator)
ans.append('Last record UID +1: %r'%self.last_record_uid)
ans.append('Next record list id: %r'%self.next_rec_list_id)
ans.append('Number of records: %s'%self.number_of_records)
return '\n'.join(ans)
# }}}
class Record(object): # {{{
def __init__(self, raw, header):
self.offset, self.flags, self.uid = header
self.raw = raw
@property
def header(self):
return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags,
self.uid, self.raw[:4], len(self.raw))
# }}}
# EXTH {{{
class EXTHRecord(object):
def __init__(self, type_, data):
self.type = type_
self.data = data
self.name = {
1 : 'DRM Server id',
2 : 'DRM Commerce id',
3 : 'DRM ebookbase book id',
100 : 'author',
101 : 'publisher',
102 : 'imprint',
103 : 'description',
104 : 'isbn',
105 : 'subject',
106 : 'publishingdate',
107 : 'review',
108 : 'contributor',
109 : 'rights',
110 : 'subjectcode',
111 : 'type',
112 : 'source',
113 : 'asin',
114 : 'versionnumber',
115 : 'sample',
116 : 'startreading',
117 : 'adult',
118 : 'retailprice',
119 : 'retailpricecurrency',
121 : 'KF8 header section index',
125 : 'KF8 resources (images/fonts) count',
129 : 'KF8 cover URI',
131 : 'KF8 unknown count',
201 : 'coveroffset',
202 : 'thumboffset',
203 : 'hasfakecover',
204 : 'Creator Software',
205 : 'Creator Major Version', # '>I'
206 : 'Creator Minor Version', # '>I'
207 : 'Creator Build Number', # '>I'
208 : 'watermark',
209 : 'tamper_proof_keys',
300 : 'fontsignature',
301 : 'clippinglimit', # percentage '>B'
402 : 'publisherlimit',
404 : 'TTS flag', # '>B' 1 - TTS disabled 0 - TTS enabled
501 : 'cdetype', # 4 chars (PDOC or EBOK)
502 : 'lastupdatetime',
503 : 'updatedtitle',
}.get(self.type, repr(self.type))
if (self.name in {'coveroffset', 'thumboffset', 'hasfakecover',
'Creator Major Version', 'Creator Minor Version',
'Creator Build Number', 'Creator Software', 'startreading'} or
self.type in {121, 125, 131}):
self.data, = struct.unpack(b'>I', self.data)
def __str__(self):
return '%s (%d): %r'%(self.name, self.type, self.data)
class EXTHHeader(object):
def __init__(self, raw):
self.raw = raw
if not self.raw.startswith(b'EXTH'):
raise ValueError('EXTH header does not start with EXTH')
self.length, = struct.unpack(b'>I', self.raw[4:8])
self.count, = struct.unpack(b'>I', self.raw[8:12])
pos = 12
self.records = []
for i in xrange(self.count):
pos = self.read_record(pos)
self.records.sort(key=lambda x:x.type)
self.rmap = {x.type:x for x in self.records}
def __getitem__(self, type_):
return self.rmap.__getitem__(type_).data
def get(self, type_, default=None):
ans = self.rmap.get(type_, default)
return getattr(ans, 'data', default)
def read_record(self, pos):
type_, length = struct.unpack(b'>II', self.raw[pos:pos+8])
data = self.raw[(pos+8):(pos+length)]
self.records.append(EXTHRecord(type_, data))
return pos + length
@property
def kf8_header_index(self):
return self.get(121, None)
def __str__(self):
ans = ['*'*20 + ' EXTH Header '+ '*'*20]
ans.append('EXTH header length: %d'%self.length)
ans.append('Number of EXTH records: %d'%self.count)
ans.append('EXTH records...')
for r in self.records:
ans.append(str(r))
return '\n'.join(ans)
# }}}
class MOBIHeader(object): # {{{
def __init__(self, record0, offset):
self.raw = record0.raw
self.header_offset = offset
self.compression_raw = self.raw[:2]
self.compression = {1: 'No compression', 2: 'PalmDoc compression',
17480: 'HUFF/CDIC compression'}.get(struct.unpack(b'>H',
self.compression_raw)[0],
repr(self.compression_raw))
self.unused = self.raw[2:4]
self.text_length, = struct.unpack(b'>I', self.raw[4:8])
self.number_of_text_records, self.text_record_size = \
struct.unpack(b'>HH', self.raw[8:12])
self.encryption_type_raw, = struct.unpack(b'>H', self.raw[12:14])
self.encryption_type = {
0: 'No encryption',
1: 'Old mobipocket encryption',
2: 'Mobipocket encryption'
}.get(self.encryption_type_raw, repr(self.encryption_type_raw))
self.unknown = self.raw[14:16]
self.identifier = self.raw[16:20]
if self.identifier != b'MOBI':
raise ValueError('Identifier %r unknown'%self.identifier)
self.length, = struct.unpack(b'>I', self.raw[20:24])
self.type_raw, = struct.unpack(b'>I', self.raw[24:28])
self.type = {
2 : 'Mobipocket book',
3 : 'PalmDOC book',
4 : 'Audio',
257 : 'News',
258 : 'News Feed',
259 : 'News magazine',
513 : 'PICS',
514 : 'Word',
515 : 'XLS',
516 : 'PPT',
517 : 'TEXT',
518 : 'HTML',
}.get(self.type_raw, repr(self.type_raw))
self.encoding_raw, = struct.unpack(b'>I', self.raw[28:32])
self.encoding = {
1252 : 'cp1252',
65001: 'utf-8',
}.get(self.encoding_raw, repr(self.encoding_raw))
self.uid = self.raw[32:36]
self.file_version, = struct.unpack(b'>I', self.raw[36:40])
self.meta_orth_indx, self.meta_infl_indx = struct.unpack(
b'>II', self.raw[40:48])
self.secondary_index_record, = struct.unpack(b'>I', self.raw[48:52])
self.reserved = self.raw[52:80]
self.first_non_book_record, = struct.unpack(b'>I', self.raw[80:84])
self.fullname_offset, = struct.unpack(b'>I', self.raw[84:88])
self.fullname_length, = struct.unpack(b'>I', self.raw[88:92])
self.locale_raw, = struct.unpack(b'>I', self.raw[92:96])
langcode = self.locale_raw
langid = langcode & 0xFF
sublangid = (langcode >> 10) & 0xFF
self.language = main_language.get(langid, 'ENGLISH')
self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
self.input_language = self.raw[96:100]
self.output_langauage = self.raw[100:104]
self.min_version, = struct.unpack(b'>I', self.raw[104:108])
self.first_image_index, = struct.unpack(b'>I', self.raw[108:112])
self.huffman_record_offset, = struct.unpack(b'>I', self.raw[112:116])
self.huffman_record_count, = struct.unpack(b'>I', self.raw[116:120])
self.datp_record_offset, = struct.unpack(b'>I', self.raw[120:124])
self.datp_record_count, = struct.unpack(b'>I', self.raw[124:128])
self.exth_flags, = struct.unpack(b'>I', self.raw[128:132])
self.has_exth = bool(self.exth_flags & 0x40)
self.has_drm_data = self.length >= 174 and len(self.raw) >= 180
if self.has_drm_data:
self.unknown3 = self.raw[132:164]
self.drm_offset, = struct.unpack(b'>I', self.raw[164:168])
self.drm_count, = struct.unpack(b'>I', self.raw[168:172])
self.drm_size, = struct.unpack(b'>I', self.raw[172:176])
self.drm_flags = bin(struct.unpack(b'>I', self.raw[176:180])[0])
self.has_extra_data_flags = self.length >= 232 and len(self.raw) >= 232+16
self.has_fcis_flis = False
self.has_multibytes = self.has_indexing_bytes = self.has_uncrossable_breaks = False
self.extra_data_flags = 0
if self.has_extra_data_flags:
self.unknown4 = self.raw[180:192]
self.fdst_idx, self.fdst_count = struct.unpack_from(b'>II',
self.raw, 192)
(self.fcis_number, self.fcis_count, self.flis_number,
self.flis_count) = struct.unpack(b'>IIII',
self.raw[200:216])
self.unknown6 = self.raw[216:224]
self.srcs_record_index = struct.unpack(b'>I',
self.raw[224:228])[0]
self.num_srcs_records = struct.unpack(b'>I',
self.raw[228:232])[0]
self.unknown7 = self.raw[232:240]
self.extra_data_flags = struct.unpack(b'>I',
self.raw[240:244])[0]
self.has_multibytes = bool(self.extra_data_flags & 0b1)
self.has_indexing_bytes = bool(self.extra_data_flags & 0b10)
self.has_uncrossable_breaks = bool(self.extra_data_flags & 0b100)
self.primary_index_record, = struct.unpack(b'>I',
self.raw[244:248])
if self.file_version >= 8:
(self.sect_idx, self.skel_idx, self.datp_idx, self.oth_idx
) = struct.unpack_from(b'>4L', self.raw, 248)
self.unknown9 = self.raw[264:self.length]
if self.meta_orth_indx != self.sect_idx:
raise ValueError('KF8 header has different Meta orth and '
'section indices')
# The following are all relative to the position of the header record
# make them absolute for ease of debugging
for x in ('sect_idx', 'skel_idx', 'datp_idx', 'oth_idx',
'meta_orth_indx', 'huffman_record_offset',
'first_non_book_record', 'datp_record_offset', 'fcis_number',
'flis_number', 'primary_index_record', 'fdst_idx',
'first_image_index'):
if hasattr(self, x):
setattr(self, x, self.header_offset+getattr(self, x))
if self.has_exth:
self.exth_offset = 16 + self.length
self.exth = EXTHHeader(self.raw[self.exth_offset:])
self.end_of_exth = self.exth_offset + self.exth.length
self.bytes_after_exth = self.raw[self.end_of_exth:self.fullname_offset]
def __str__(self):
ans = ['*'*20 + ' MOBI %d Header '%self.file_version+ '*'*20]
a = ans.append
i = lambda d, x : a('%s (null value: %d): %d'%(d, NULL_INDEX, x))
ans.append('Compression: %s'%self.compression)
ans.append('Unused: %r'%self.unused)
ans.append('Number of text records: %d'%self.number_of_text_records)
ans.append('Text record size: %d'%self.text_record_size)
ans.append('Encryption: %s'%self.encryption_type)
ans.append('Unknown: %r'%self.unknown)
ans.append('Identifier: %r'%self.identifier)
ans.append('Header length: %d'% self.length)
ans.append('Type: %s'%self.type)
ans.append('Encoding: %s'%self.encoding)
ans.append('UID: %r'%self.uid)
ans.append('File version: %d'%self.file_version)
i('Meta Orth Index (Sections index in KF8)', self.meta_orth_indx)
i('Meta Infl Index', self.meta_infl_indx)
ans.append('Secondary index record: %d (null val: %d)'%(
self.secondary_index_record, NULL_INDEX))
ans.append('Reserved: %r'%self.reserved)
ans.append('First non-book record (null value: %d): %d'%(NULL_INDEX,
self.first_non_book_record))
ans.append('Full name offset: %d'%self.fullname_offset)
ans.append('Full name length: %d bytes'%self.fullname_length)
ans.append('Langcode: %r'%self.locale_raw)
ans.append('Language: %s'%self.language)
ans.append('Sub language: %s'%self.sublanguage)
ans.append('Input language: %r'%self.input_language)
ans.append('Output language: %r'%self.output_langauage)
ans.append('Min version: %d'%self.min_version)
ans.append('First Image index: %d'%self.first_image_index)
ans.append('Huffman record offset: %d'%self.huffman_record_offset)
ans.append('Huffman record count: %d'%self.huffman_record_count)
ans.append('DATP record offset: %r'%self.datp_record_offset)
ans.append('DATP record count: %r'%self.datp_record_count)
ans.append('EXTH flags: %s (%s)'%(bin(self.exth_flags)[2:], self.has_exth))
if self.has_drm_data:
ans.append('Unknown3: %r'%self.unknown3)
ans.append('DRM Offset: %s'%self.drm_offset)
ans.append('DRM Count: %s'%self.drm_count)
ans.append('DRM Size: %s'%self.drm_size)
ans.append('DRM Flags: %r'%self.drm_flags)
if self.has_extra_data_flags:
ans.append('Unknown4: %r'%self.unknown4)
ans.append('FDST Index: %d'% self.fdst_idx)
ans.append('FDST Count: %d'% self.fdst_count)
ans.append('FCIS number: %d'% self.fcis_number)
ans.append('FCIS count: %d'% self.fcis_count)
ans.append('FLIS number: %d'% self.flis_number)
ans.append('FLIS count: %d'% self.flis_count)
ans.append('Unknown6: %r'% self.unknown6)
ans.append('SRCS record index: %d'%self.srcs_record_index)
ans.append('Number of SRCS records?: %d'%self.num_srcs_records)
ans.append('Unknown7: %r'%self.unknown7)
ans.append(('Extra data flags: %s (has multibyte: %s) '
'(has indexing: %s) (has uncrossable breaks: %s)')%(
bin(self.extra_data_flags), self.has_multibytes,
self.has_indexing_bytes, self.has_uncrossable_breaks ))
ans.append('Primary index record (null value: %d): %d'%(NULL_INDEX,
self.primary_index_record))
if self.file_version >= 8:
i('Sections Index', self.sect_idx)
i('SKEL Index', self.skel_idx)
i('DATP Index', self.datp_idx)
i('Other Index', self.oth_idx)
if self.unknown9:
a('Unknown9: %r'%self.unknown9)
ans = '\n'.join(ans)
if self.has_exth:
ans += '\n\n' + str(self.exth)
ans += '\n\nBytes after EXTH (%d bytes): %s'%(
len(self.bytes_after_exth),
format_bytes(self.bytes_after_exth))
ans += '\nNumber of bytes after full name: %d' % (len(self.raw) - (self.fullname_offset +
self.fullname_length))
ans += '\nRecord 0 length: %d'%len(self.raw)
return ans
# }}}
class MOBIFile(object):
def __init__(self, stream):
self.raw = stream.read()
self.palmdb = PalmDB(self.raw[:78])
self.record_headers = []
self.records = []
for i in xrange(self.palmdb.number_of_records):
pos = 78 + i * 8
offset, a1, a2, a3, a4 = struct.unpack(b'>LBBBB', self.raw[pos:pos+8])
flags, val = a1, a2 << 16 | a3 << 8 | a4
self.record_headers.append((offset, flags, val))
def section(section_number):
if section_number == self.palmdb.number_of_records - 1:
end_off = len(self.raw)
else:
end_off = self.record_headers[section_number + 1][0]
off = self.record_headers[section_number][0]
return self.raw[off:end_off]
for i in range(self.palmdb.number_of_records):
self.records.append(Record(section(i), self.record_headers[i]))
self.mobi_header = MOBIHeader(self.records[0], 0)
self.huffman_record_nums = []
self.kf8_type = None
mh = mh8 = self.mobi_header
if mh.file_version >= 8:
self.kf8_type = 'standalone'
elif mh.has_exth and mh.exth.kf8_header_index is not None:
self.kf8_type = 'joint'
kf8i = mh.exth.kf8_header_index
mh8 = MOBIHeader(self.records[kf8i], kf8i)
self.mobi8_header = mh8
if 'huff' in self.mobi_header.compression.lower():
from calibre.ebooks.mobi.huffcdic import HuffReader
def huffit(off, cnt):
huffman_record_nums = list(xrange(off, off+cnt))
huffrecs = [self.records[r].raw for r in huffman_record_nums]
huffs = HuffReader(huffrecs)
return huffman_record_nums, huffs.unpack
if self.kf8_type == 'joint':
recs6, d6 = huffit(mh.huffman_record_offset,
mh.huffman_record_count)
recs8, d8 = huffit(mh8.huffman_record_offset,
mh8.huffman_record_count)
self.huffman_record_nums = recs6 + recs8
else:
self.huffman_record_nums, d6 = huffit(mh.huffman_record_offset,
mh.huffman_record_count)
d8 = d6
elif 'palmdoc' in self.mobi_header.compression.lower():
from calibre.ebooks.compression.palmdoc import decompress_doc
d8 = d6 = decompress_doc
else:
d8 = d6 = lambda x: x
self.decompress6, self.decompress8 = d6, d8
class TextRecord(object): # {{{
def __init__(self, idx, record, extra_data_flags, decompress):
self.trailing_data, self.raw = get_trailing_data(record.raw, extra_data_flags)
raw_trailing_bytes = record.raw[len(self.raw):]
self.raw = decompress(self.raw)
if 0 in self.trailing_data:
self.trailing_data['multibyte_overlap'] = self.trailing_data.pop(0)
if 1 in self.trailing_data:
self.trailing_data['indexing'] = self.trailing_data.pop(1)
if 2 in self.trailing_data:
self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2)
self.trailing_data['raw_bytes'] = raw_trailing_bytes
for typ, val in self.trailing_data.iteritems():
if isinstance(typ, int):
print ('Record %d has unknown trailing data of type: %d : %r'%
(idx, typ, val))
self.idx = idx
def dump(self, folder):
name = '%06d'%self.idx
with open(os.path.join(folder, name+'.txt'), 'wb') as f:
f.write(self.raw)
with open(os.path.join(folder, name+'.trailing_data'), 'wb') as f:
for k, v in self.trailing_data.iteritems():
raw = '%s : %r\n\n'%(k, v)
f.write(raw.encode('utf-8'))
# }}}

View File

@ -0,0 +1,48 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, shutil
from calibre.ebooks.mobi.debug.headers import MOBIFile
from calibre.ebooks.mobi.debug.mobi6 import inspect_mobi as inspect_mobi6
from calibre.ebooks.mobi.debug.mobi8 import inspect_mobi as inspect_mobi8
def inspect_mobi(path_or_stream, ddir=None): # {{{
stream = (path_or_stream if hasattr(path_or_stream, 'read') else
open(path_or_stream, 'rb'))
f = MOBIFile(stream)
if ddir is None:
ddir = 'decompiled_' + os.path.splitext(os.path.basename(stream.name))[0]
try:
shutil.rmtree(ddir)
except:
pass
os.makedirs(ddir)
if f.kf8_type is None:
inspect_mobi6(f, ddir)
elif f.kf8_type == 'joint':
p6 = os.path.join(ddir, 'mobi6')
os.mkdir(p6)
inspect_mobi6(f, p6)
p8 = os.path.join(ddir, 'mobi8')
os.mkdir(p8)
inspect_mobi8(f, p8)
else:
inspect_mobi8(f, ddir)
print ('Debug data saved to:', ddir)
# }}}
def main():
inspect_mobi(sys.argv[1])
if __name__ == '__main__':
main()

View File

@ -0,0 +1,839 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct, sys, os
from collections import OrderedDict, defaultdict
from lxml import html
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import (parse_index_record,
parse_tagx_section)
from calibre.ebooks.mobi.utils import (decode_hex_number, decint,
decode_tbs, read_font_record)
from calibre.utils.magick.draw import identify_data
from calibre.ebooks.mobi.debug import format_bytes
from calibre.ebooks.mobi.debug.headers import TextRecord
class TagX(object): # {{{
def __init__(self, tag, num_values, bitmask, eof):
self.tag, self.num_values, self.bitmask, self.eof = (tag, num_values,
bitmask, eof)
self.num_of_values = num_values
self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
and self.bitmask == 0)
def __repr__(self):
return 'TAGX(tag=%02d, num_values=%d, bitmask=%r, eof=%d)' % (self.tag,
self.num_values, bin(self.bitmask), self.eof)
# }}}
class SecondaryIndexHeader(object): # {{{
def __init__(self, record):
self.record = record
raw = self.record.raw
#open('/t/index_header.bin', 'wb').write(raw)
if raw[:4] != b'INDX':
raise ValueError('Invalid Secondary Index Record')
self.header_length, = struct.unpack('>I', raw[4:8])
self.unknown1 = raw[8:16]
self.index_type, = struct.unpack('>I', raw[16:20])
self.index_type_desc = {0: 'normal', 2:
'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
self.idxt_start, = struct.unpack('>I', raw[20:24])
self.index_count, = struct.unpack('>I', raw[24:28])
self.index_encoding_num, = struct.unpack('>I', raw[28:32])
self.index_encoding = {65001: 'utf-8', 1252:
'cp1252'}.get(self.index_encoding_num, 'unknown')
if self.index_encoding == 'unknown':
raise ValueError(
'Unknown index encoding: %d'%self.index_encoding_num)
self.unknown2 = raw[32:36]
self.num_index_entries, = struct.unpack('>I', raw[36:40])
self.ordt_start, = struct.unpack('>I', raw[40:44])
self.ligt_start, = struct.unpack('>I', raw[44:48])
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
self.unknown3 = raw[56:180]
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
if self.tagx_offset != self.header_length:
raise ValueError('TAGX offset and header length disagree')
self.unknown4 = raw[184:self.header_length]
tagx = raw[self.header_length:]
if not tagx.startswith(b'TAGX'):
raise ValueError('Invalid TAGX section')
self.tagx_header_length, = struct.unpack('>I', tagx[4:8])
self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12])
self.tagx_entries = [TagX(*x) for x in parse_tagx_section(tagx)[1]]
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
raise ValueError('TAGX last entry is not EOF')
idxt0_pos = self.header_length+self.tagx_header_length
num = ord(raw[idxt0_pos])
count_pos = idxt0_pos+1+num
self.last_entry = raw[idxt0_pos+1:count_pos]
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
# There may be some alignment zero bytes between the end of the idxt0
# and self.idxt_start
idxt = raw[self.idxt_start:]
if idxt[:4] != b'IDXT':
raise ValueError('Invalid IDXT header')
length_check, = struct.unpack(b'>H', idxt[4:6])
if length_check != self.header_length + self.tagx_header_length:
raise ValueError('Length check failed')
if idxt[6:].replace(b'\0', b''):
raise ValueError('Non null trailing bytes after IDXT')
def __str__(self):
ans = ['*'*20 + ' Secondary Index Header '+ '*'*20]
a = ans.append
def u(w):
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
len(w), not bool(w.replace(b'\0', b'')) ))
a('Header length: %d'%self.header_length)
u(self.unknown1)
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
a('Offset to IDXT start: %d'%self.idxt_start)
a('Number of index records: %d'%self.index_count)
a('Index encoding: %s (%d)'%(self.index_encoding,
self.index_encoding_num))
u(self.unknown2)
a('Number of index entries: %d'% self.num_index_entries)
a('ORDT start: %d'%self.ordt_start)
a('LIGT start: %d'%self.ligt_start)
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
u(self.unknown3)
a('TAGX offset: %d'%self.tagx_offset)
u(self.unknown4)
a('\n\n')
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
a('Header length: %d'%self.tagx_header_length)
a('Control byte count: %d'%self.tagx_control_byte_count)
for i in self.tagx_entries:
a('\t' + repr(i))
a('Index of last IndexEntry in secondary index record: %s'% self.last_entry)
a('Number of entries in the NCX: %d'% self.ncx_count)
return '\n'.join(ans)
# }}}
class IndexHeader(object): # {{{
def __init__(self, record):
self.record = record
raw = self.record.raw
#open('/t/index_header.bin', 'wb').write(raw)
if raw[:4] != b'INDX':
raise ValueError('Invalid Primary Index Record')
self.header_length, = struct.unpack('>I', raw[4:8])
self.unknown1 = raw[8:12]
self.header_type, = struct.unpack('>I', raw[12:16])
self.index_type, = struct.unpack('>I', raw[16:20])
self.index_type_desc = {0: 'normal', 2:
'inflection', 6: 'calibre'}.get(self.index_type, 'unknown')
self.idxt_start, = struct.unpack('>I', raw[20:24])
self.index_count, = struct.unpack('>I', raw[24:28])
self.index_encoding_num, = struct.unpack('>I', raw[28:32])
self.index_encoding = {65001: 'utf-8', 1252:
'cp1252'}.get(self.index_encoding_num, 'unknown')
if self.index_encoding == 'unknown':
raise ValueError(
'Unknown index encoding: %d'%self.index_encoding_num)
self.possibly_language = raw[32:36]
self.num_index_entries, = struct.unpack('>I', raw[36:40])
self.ordt_start, = struct.unpack('>I', raw[40:44])
self.ligt_start, = struct.unpack('>I', raw[44:48])
self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
self.num_of_cncx_blocks, = struct.unpack('>I', raw[52:56])
self.unknown2 = raw[56:180]
self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
if self.tagx_offset != self.header_length:
raise ValueError('TAGX offset and header length disagree')
self.unknown3 = raw[184:self.header_length]
tagx = raw[self.header_length:]
if not tagx.startswith(b'TAGX'):
raise ValueError('Invalid TAGX section')
self.tagx_header_length, = struct.unpack('>I', tagx[4:8])
self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12])
self.tagx_entries = [TagX(*x) for x in parse_tagx_section(tagx)[1]]
if self.tagx_entries and not self.tagx_entries[-1].is_eof:
raise ValueError('TAGX last entry is not EOF')
idxt0_pos = self.header_length+self.tagx_header_length
last_num, consumed = decode_hex_number(raw[idxt0_pos:])
count_pos = idxt0_pos + consumed
self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
self.last_entry = last_num
if last_num != self.ncx_count - 1:
raise ValueError('Last id number in the NCX != NCX count - 1')
# There may be some alignment zero bytes between the end of the idxt0
# and self.idxt_start
idxt = raw[self.idxt_start:]
if idxt[:4] != b'IDXT':
raise ValueError('Invalid IDXT header')
length_check, = struct.unpack(b'>H', idxt[4:6])
if length_check != self.header_length + self.tagx_header_length:
raise ValueError('Length check failed')
if idxt[6:].replace(b'\0', b''):
raise ValueError('Non null trailing bytes after IDXT')
def __str__(self):
ans = ['*'*20 + ' Index Header (%d bytes)'%len(self.record.raw)+ '*'*20]
a = ans.append
def u(w):
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
len(w), not bool(w.replace(b'\0', b'')) ))
a('Header length: %d'%self.header_length)
u(self.unknown1)
a('Header type: %d'%self.header_type)
a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
a('Offset to IDXT start: %d'%self.idxt_start)
a('Number of index records: %d'%self.index_count)
a('Index encoding: %s (%d)'%(self.index_encoding,
self.index_encoding_num))
a('Unknown (possibly language?): %r'%(self.possibly_language))
a('Number of index entries: %d'% self.num_index_entries)
a('ORDT start: %d'%self.ordt_start)
a('LIGT start: %d'%self.ligt_start)
a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
a('Number of cncx blocks: %d'%self.num_of_cncx_blocks)
u(self.unknown2)
a('TAGX offset: %d'%self.tagx_offset)
u(self.unknown3)
a('\n\n')
a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
a('Header length: %d'%self.tagx_header_length)
a('Control byte count: %d'%self.tagx_control_byte_count)
for i in self.tagx_entries:
a('\t' + repr(i))
a('Index of last IndexEntry in primary index record: %s'% self.last_entry)
a('Number of entries in the NCX: %d'% self.ncx_count)
return '\n'.join(ans)
# }}}
class Tag(object): # {{{
'''
Index entries are a collection of tags. Each tag is represented by this
class.
'''
TAG_MAP = {
1: ('offset', 'Offset in HTML'),
2: ('size', 'Size in HTML'),
3: ('label_offset', 'Label offset in CNCX'),
4: ('depth', 'Depth of this entry in TOC'),
5: ('class_offset', 'Class offset in CNCX'),
6: ('pos_fid', 'File Index'),
11: ('secondary', '[unknown, unknown, '
'tag type from TAGX in primary index header]'),
21: ('parent_index', 'Parent'),
22: ('first_child_index', 'First child'),
23: ('last_child_index', 'Last child'),
69 : ('image_index', 'Offset from first image record to the'
' image record associated with this entry'
' (masthead for periodical or thumbnail for'
' article entry).'),
70 : ('desc_offset', 'Description offset in cncx'),
71 : ('author_offset', 'Author offset in cncx'),
72 : ('image_caption_offset', 'Image caption offset in cncx'),
73 : ('image_attr_offset', 'Image attribution offset in cncx'),
}
def __init__(self, tag_type, vals, cncx):
self.value = vals if len(vals) > 1 else vals[0] if vals else None
self.cncx_value = None
if tag_type in self.TAG_MAP:
self.attr, self.desc = self.TAG_MAP[tag_type]
else:
print ('Unknown tag value: %%s'%tag_type)
self.desc = '??Unknown (tag value: %d)'%tag_type
self.attr = 'unknown'
if '_offset' in self.attr:
self.cncx_value = cncx[self.value]
def __str__(self):
if self.cncx_value is not None:
return '%s : %r [%r]'%(self.desc, self.value, self.cncx_value)
return '%s : %r'%(self.desc, self.value)
# }}}
class IndexEntry(object): # {{{
'''
The index is made up of entries, each of which is represented by an
instance of this class. Index entries typically point to offsets in the
HTML, specify HTML sizes and point to text strings in the CNCX that are
used in the navigation UI.
'''
def __init__(self, ident, entry, cncx):
try:
self.index = int(ident, 16)
except ValueError:
self.index = ident
self.tags = [Tag(tag_type, vals, cncx) for tag_type, vals in
entry.iteritems()]
@property
def label(self):
for tag in self.tags:
if tag.attr == 'label_offset':
return tag.cncx_value
return ''
@property
def offset(self):
for tag in self.tags:
if tag.attr == 'offset':
return tag.value
return 0
@property
def size(self):
for tag in self.tags:
if tag.attr == 'size':
return tag.value
return 0
@property
def depth(self):
for tag in self.tags:
if tag.attr == 'depth':
return tag.value
return 0
@property
def parent_index(self):
for tag in self.tags:
if tag.attr == 'parent_index':
return tag.value
return -1
@property
def first_child_index(self):
for tag in self.tags:
if tag.attr == 'first_child_index':
return tag.value
return -1
@property
def last_child_index(self):
for tag in self.tags:
if tag.attr == 'last_child_index':
return tag.value
return -1
@property
def pos_fid(self):
for tag in self.tags:
if tag.attr == 'pos_fid':
return tag.value
return [0, 0]
def __str__(self):
ans = ['Index Entry(index=%s, length=%d)'%(
self.index, len(self.tags))]
for tag in self.tags:
if tag.value is not None:
ans.append('\t'+str(tag))
if self.first_child_index != -1:
ans.append('\tNumber of children: %d'%(self.last_child_index -
self.first_child_index + 1))
return '\n'.join(ans)
# }}}
class IndexRecord(object): # {{{
'''
Represents all indexing information in the MOBI, apart from indexing info
in the trailing data of the text records.
'''
def __init__(self, records, index_header, cncx):
self.alltext = None
table = OrderedDict()
tags = [TagX(x.tag, x.num_values, x.bitmask, x.eof) for x in
index_header.tagx_entries]
for record in records:
raw = record.raw
if raw[:4] != b'INDX':
raise ValueError('Invalid Primary Index Record')
parse_index_record(table, record.raw,
index_header.tagx_control_byte_count, tags,
index_header.index_encoding, strict=True)
self.indices = []
for ident, entry in table.iteritems():
self.indices.append(IndexEntry(ident, entry, cncx))
def get_parent(self, index):
if index.depth < 1:
return None
parent_depth = index.depth - 1
for p in self.indices:
if p.depth != parent_depth:
continue
def __str__(self):
ans = ['*'*20 + ' Index Entries (%d entries) '%len(self.indices)+ '*'*20]
a = ans.append
def u(w):
a('Unknown: %r (%d bytes) (All zeros: %r)'%(w,
len(w), not bool(w.replace(b'\0', b'')) ))
for entry in self.indices:
offset = entry.offset
a(str(entry))
t = self.alltext
if offset is not None and self.alltext is not None:
a('\tHTML before offset: %r'%t[offset-50:offset])
a('\tHTML after offset: %r'%t[offset:offset+50])
p = offset+entry.size
a('\tHTML before end: %r'%t[p-50:p])
a('\tHTML after end: %r'%t[p:p+50])
a('')
return '\n'.join(ans)
# }}}
class CNCX(object): # {{{
'''
Parses the records that contain the compiled NCX (all strings from the
NCX). Presents a simple offset : string mapping interface to access the
data.
'''
def __init__(self, records, codec):
self.records = OrderedDict()
record_offset = 0
for record in records:
raw = record.raw
pos = 0
while pos < len(raw):
length, consumed = decint(raw[pos:])
if length > 0:
try:
self.records[pos+record_offset] = raw[
pos+consumed:pos+consumed+length].decode(codec)
except:
byts = raw[pos:]
r = format_bytes(byts)
print ('CNCX entry at offset %d has unknown format %s'%(
pos+record_offset, r))
self.records[pos+record_offset] = r
pos = len(raw)
pos += consumed+length
record_offset += 0x10000
def __getitem__(self, offset):
return self.records.get(offset)
def __str__(self):
ans = ['*'*20 + ' cncx (%d strings) '%len(self.records)+ '*'*20]
for k, v in self.records.iteritems():
ans.append('%10d : %s'%(k, v))
return '\n'.join(ans)
# }}}
class ImageRecord(object): # {{{
def __init__(self, idx, record, fmt):
self.raw = record.raw
self.fmt = fmt
self.idx = idx
def dump(self, folder):
name = '%06d'%self.idx
with open(os.path.join(folder, name+'.'+self.fmt), 'wb') as f:
f.write(self.raw)
# }}}
class BinaryRecord(object): # {{{
def __init__(self, idx, record):
self.raw = record.raw
sig = self.raw[:4]
name = '%06d'%idx
if sig in {b'FCIS', b'FLIS', b'SRCS', b'DATP', b'RESC', b'BOUN',
b'FDST', b'AUDI', b'VIDE',}:
name += '-' + sig.decode('ascii')
elif sig == b'\xe9\x8e\r\n':
name += '-' + 'EOF'
self.name = name
def dump(self, folder):
with open(os.path.join(folder, self.name+'.bin'), 'wb') as f:
f.write(self.raw)
# }}}
class FontRecord(object): # {{{
def __init__(self, idx, record):
self.raw = record.raw
name = '%06d'%idx
self.font = read_font_record(self.raw)
if self.font['err']:
raise ValueError('Failed to read font record: %s Headers: %s'%(
self.font['err'], self.font['headers']))
self.payload = (self.font['font_data'] if self.font['font_data'] else
self.font['raw_data'])
self.name = '%s.%s'%(name, self.font['ext'])
def dump(self, folder):
with open(os.path.join(folder, self.name), 'wb') as f:
f.write(self.payload)
# }}}
class TBSIndexing(object): # {{{
def __init__(self, text_records, indices, doc_type):
self.record_indices = OrderedDict()
self.doc_type = doc_type
self.indices = indices
pos = 0
for r in text_records:
start = pos
pos += len(r.raw)
end = pos - 1
self.record_indices[r] = x = {'starts':[], 'ends':[],
'complete':[], 'geom': (start, end)}
for entry in indices:
istart, sz = entry.offset, entry.size
iend = istart + sz - 1
has_start = istart >= start and istart <= end
has_end = iend >= start and iend <= end
rec = None
if has_start and has_end:
rec = 'complete'
elif has_start and not has_end:
rec = 'starts'
elif not has_start and has_end:
rec = 'ends'
if rec:
x[rec].append(entry)
def get_index(self, idx):
for i in self.indices:
if i.index in {idx, unicode(idx)}: return i
raise IndexError('Index %d not found'%idx)
def __str__(self):
ans = ['*'*20 + ' TBS Indexing (%d records) '%len(self.record_indices)+ '*'*20]
for r, dat in self.record_indices.iteritems():
ans += self.dump_record(r, dat)[-1]
return '\n'.join(ans)
def dump(self, bdir):
types = defaultdict(list)
for r, dat in self.record_indices.iteritems():
tbs_type, strings = self.dump_record(r, dat)
if tbs_type == 0: continue
types[tbs_type] += strings
for typ, strings in types.iteritems():
with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f:
f.write('\n'.join(strings))
def dump_record(self, r, dat):
ans = []
ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx,
dat['geom'][0], dat['geom'][1]))
s, e, c = dat['starts'], dat['ends'], dat['complete']
ans.append(('\tContains: %d index entries '
'(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e,
c, s))))
byts = bytearray(r.trailing_data.get('indexing', b''))
ans.append('TBS bytes: %s'%format_bytes(byts))
for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)):
if entries:
ans.append('\t%s:'%typ)
for x in entries:
ans.append(('\t\tIndex Entry: %s (Parent index: %s, '
'Depth: %d, Offset: %d, Size: %d) [%s]')%(
x.index, x.parent_index, x.depth, x.offset, x.size, x.label))
def bin4(num):
ans = bin(num)[2:]
return bytes('0'*(4-len(ans)) + ans)
def repr_extra(x):
return str({bin4(k):v for k, v in extra.iteritems()})
tbs_type = 0
is_periodical = self.doc_type in (257, 258, 259)
if len(byts):
outermost_index, extra, consumed = decode_tbs(byts, flag_size=3)
byts = byts[consumed:]
for k in extra:
tbs_type |= k
ans.append('\nTBS: %d (%s)'%(tbs_type, bin4(tbs_type)))
ans.append('Outermost index: %d'%outermost_index)
ans.append('Unknown extra start bytes: %s'%repr_extra(extra))
if is_periodical: # Hierarchical periodical
try:
byts, a = self.interpret_periodical(tbs_type, byts,
dat['geom'][0])
except:
import traceback
traceback.print_exc()
a = []
print ('Failed to decode TBS bytes for record: %d'%r.idx)
ans += a
if byts:
sbyts = tuple(hex(b)[2:] for b in byts)
ans.append('Remaining bytes: %s'%' '.join(sbyts))
ans.append('')
return tbs_type, ans
def interpret_periodical(self, tbs_type, byts, record_offset):
ans = []
def read_section_transitions(byts, psi=None): # {{{
if psi is None:
# Assume previous section is 1
psi = self.get_index(1)
while byts:
ai, extra, consumed = decode_tbs(byts)
byts = byts[consumed:]
if extra.get(0b0010, None) is not None:
raise ValueError('Dont know how to interpret flag 0b0010'
' while reading section transitions')
if extra.get(0b1000, None) is not None:
if len(extra) > 1:
raise ValueError('Dont know how to interpret flags'
' %r while reading section transitions'%extra)
nsi = self.get_index(psi.index+1)
ans.append('Last article in this record of section %d'
' (relative to next section index [%d]): '
'%d [%d absolute index]'%(psi.index, nsi.index, ai,
ai+nsi.index))
psi = nsi
continue
ans.append('First article in this record of section %d'
' (relative to its parent section): '
'%d [%d absolute index]'%(psi.index, ai, ai+psi.index))
num = extra.get(0b0100, None)
if num is None:
msg = ('The section %d has at most one article'
' in this record')%psi.index
else:
msg = ('Number of articles in this record of '
'section %d: %d')%(psi.index, num)
ans.append(msg)
offset = extra.get(0b0001, None)
if offset is not None:
if offset == 0:
ans.append('This record is spanned by the article:'
'%d'%(ai+psi.index))
else:
ans.append('->Offset to start of next section (%d) from start'
' of record: %d [%d absolute offset]'%(psi.index+1,
offset, offset+record_offset))
return byts
# }}}
def read_starting_section(byts): # {{{
orig = byts
si, extra, consumed = decode_tbs(byts)
byts = byts[consumed:]
if len(extra) > 1 or 0b0010 in extra or 0b1000 in extra:
raise ValueError('Dont know how to interpret flags %r'
' when reading starting section'%extra)
si = self.get_index(si)
ans.append('The section at the start of this record is:'
' %s'%si.index)
if 0b0100 in extra:
num = extra[0b0100]
ans.append('The number of articles from the section %d'
' in this record: %s'%(si.index, num))
elif 0b0001 in extra:
eof = extra[0b0001]
if eof != 0:
raise ValueError('Unknown eof value %s when reading'
' starting section. All bytes: %r'%(eof, orig))
ans.append('??This record has more than one article from '
' the section: %s'%si.index)
return si, byts
# }}}
if tbs_type & 0b0100:
# Starting section is the first section
ssi = self.get_index(1)
else:
ssi, byts = read_starting_section(byts)
byts = read_section_transitions(byts, ssi)
return byts, ans
# }}}
class MOBIFile(object): # {{{
def __init__(self, mf):
for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
'huffman_record_nums',):
setattr(self, x, getattr(mf, x))
self.index_header = self.index_record = None
self.indexing_record_nums = set()
pir = self.mobi_header.primary_index_record
if pir != NULL_INDEX:
self.index_header = IndexHeader(self.records[pir])
numi = self.index_header.index_count
self.cncx = CNCX(self.records[
pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
self.index_header.index_encoding)
self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
self.index_header, self.cncx)
self.indexing_record_nums = set(xrange(pir,
pir+1+numi+self.index_header.num_of_cncx_blocks))
self.secondary_index_record = self.secondary_index_header = None
sir = self.mobi_header.secondary_index_record
if sir != NULL_INDEX:
self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
numi = self.secondary_index_header.index_count
self.indexing_record_nums.add(sir)
self.secondary_index_record = IndexRecord(
self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
self.indexing_record_nums |= set(xrange(sir+1, sir+1+numi))
ntr = self.mobi_header.number_of_text_records
fntbr = self.mobi_header.first_non_book_record
fii = self.mobi_header.first_image_index
if fntbr == NULL_INDEX:
fntbr = len(self.records)
self.text_records = [TextRecord(r, self.records[r],
self.mobi_header.extra_data_flags, mf.decompress6) for r in xrange(1,
min(len(self.records), ntr+1))]
self.image_records, self.binary_records = [], []
self.font_records = []
image_index = 0
for i in xrange(fntbr, len(self.records)):
if i in self.indexing_record_nums or i in self.huffman_record_nums:
continue
image_index += 1
r = self.records[i]
fmt = None
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
b'AUDI', b'VIDE', b'FONT'}:
try:
width, height, fmt = identify_data(r.raw)
except:
pass
if fmt is not None:
self.image_records.append(ImageRecord(image_index, r, fmt))
elif r.raw[:4] == b'FONT':
self.font_records.append(FontRecord(i, r))
else:
self.binary_records.append(BinaryRecord(i, r))
if self.index_record is not None:
self.tbs_indexing = TBSIndexing(self.text_records,
self.index_record.indices, self.mobi_header.type_raw)
def print_header(self, f=sys.stdout):
print (str(self.palmdb).encode('utf-8'), file=f)
print (file=f)
print ('Record headers:', file=f)
for i, r in enumerate(self.records):
print ('%6d. %s'%(i, r.header), file=f)
print (file=f)
print (str(self.mobi_header).encode('utf-8'), file=f)
# }}}
def inspect_mobi(mobi_file, ddir):
f = MOBIFile(mobi_file)
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
f.print_header(f=out)
alltext = os.path.join(ddir, 'text.html')
with open(alltext, 'wb') as of:
alltext = b''
for rec in f.text_records:
of.write(rec.raw)
alltext += rec.raw
of.seek(0)
root = html.fromstring(alltext.decode('utf-8'))
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
include_meta_content_type=True))
if f.index_header is not None:
f.index_record.alltext = alltext
with open(os.path.join(ddir, 'index.txt'), 'wb') as out:
print(str(f.index_header), file=out)
print('\n\n', file=out)
if f.secondary_index_header is not None:
print(str(f.secondary_index_header).encode('utf-8'), file=out)
print('\n\n', file=out)
if f.secondary_index_record is not None:
print(str(f.secondary_index_record).encode('utf-8'), file=out)
print('\n\n', file=out)
print(str(f.cncx).encode('utf-8'), file=out)
print('\n\n', file=out)
print(str(f.index_record), file=out)
with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out:
print(str(f.tbs_indexing), file=out)
f.tbs_indexing.dump(ddir)
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
('binary', 'binary_records'), ('font', 'font_records')]:
tdir = os.path.join(ddir, tdir)
os.mkdir(tdir)
for rec in getattr(f, attr):
rec.dump(tdir)
# }}}

View File

@ -0,0 +1,62 @@
#!/usr/bin/env python
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import (unicode_literals, division, absolute_import,
print_function)
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os
from calibre.ebooks.mobi.debug.headers import TextRecord
class MOBIFile(object):
def __init__(self, mf):
self.mf = mf
h, h8 = mf.mobi_header, mf.mobi8_header
first_text_record = 1
offset = 0
res_end = len(mf.records)
if mf.kf8_type == 'joint':
offset = h.exth.kf8_header_index
res_end = offset - 1
self.resource_records = mf.records[h.first_non_book_record:res_end]
self.text_records = [TextRecord(i, r, h8.extra_data_flags,
mf.decompress8) for i, r in
enumerate(mf.records[first_text_record+offset:
first_text_record+offset+h8.number_of_text_records])]
self.raw_text = b''.join(r.raw for r in self.text_records)
def print_header(self, f=sys.stdout):
print (str(self.mf.palmdb).encode('utf-8'), file=f)
print (file=f)
print ('Record headers:', file=f)
for i, r in enumerate(self.mf.records):
print ('%6d. %s'%(i, r.header), file=f)
print (file=f)
print (str(self.mf.mobi8_header).encode('utf-8'), file=f)
def inspect_mobi(mobi_file, ddir):
f = MOBIFile(mobi_file)
with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
f.print_header(f=out)
alltext = os.path.join(ddir, 'raw_text.html')
with open(alltext, 'wb') as of:
of.write(f.raw_text)
for tdir, attr in [('text_records', 'text_records'), ('images',
'image_records'), ('binary', 'binary_records'), ('font',
'font_records')]:
tdir = os.path.join(ddir, tdir)
os.mkdir(tdir)
for rec in getattr(f, attr, []):
rec.dump(tdir)

View File

@ -186,20 +186,16 @@ class BookHeader(object):
if len(raw) >= 0xF8:
self.ncxidx, = struct.unpack_from(b'>L', raw, 0xF4)
if self.mobi_version >= 8:
self.skelidx, = struct.unpack_from('>L', raw, 0xFC)
# Index into <div> sections in raw_ml
self.dividx, = struct.unpack_from('>L', raw, 0xF8)
# Index into Other files
self.othidx, = struct.unpack_from('>L', raw, 0x104)
# Ancient PRC files from Baen can have random values for
# mobi_version, so be conservative
if self.mobi_version == 8 and len(raw) >= (0xF8 + 16):
self.dividx, self.skelidx, self.datpidx, self.othidx = \
struct.unpack_from(b'>4L', raw, 0xF8)
# need to use the FDST record to find out how to properly
# unpack the raw_ml into pieces it is simply a table of start
# and end locations for each flow piece
self.fdstidx, = struct.unpack_from('>L', raw, 0xC0)
self.fdstcnt, = struct.unpack_from('>L', raw, 0xC4)
self.fdstidx, self.fdstcnt = struct.unpack_from(b'>2L', raw, 0xC0)
# if cnt is 1 or less, fdst section number can be garbage
if self.fdstcnt <= 1:
self.fdstidx = NULL_INDEX

View File

@ -8,9 +8,13 @@ __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct
from collections import OrderedDict
from collections import OrderedDict, namedtuple
from calibre.ebooks.mobi.utils import decint, count_set_bits
from calibre.ebooks.mobi.utils import (decint, count_set_bits,
decode_string)
TagX = namedtuple('TagX', 'tag num_of_values bitmask eof')
PTagX = namedtuple('PTagX', 'tag value_count value_bytes num_of_values')
class InvalidFile(ValueError):
pass
@ -37,9 +41,8 @@ def parse_indx_header(data):
'lng', 'total', 'ordt', 'ligt', 'nligt', 'ncncx'
)
num = len(words)
values = struct.unpack(b'>%dL' % num, data[4:4*(num+1)])
header = {words[i]:values[i] for i in xrange(num)}
return header
values = struct.unpack(bytes('>%dL' % num), data[4:4*(num+1)])
return dict(zip(words, values))
class CNCX(object): # {{{
@ -77,101 +80,94 @@ class CNCX(object): # {{{
return self.records.get(offset, default)
# }}}
def parse_tag_section(data):
def parse_tagx_section(data):
check_signature(data, b'TAGX')
tags = []
first_entry_offset, = struct.unpack_from(b'>L', data, 0x04)
control_byte_count, = struct.unpack_from(b'>L', data, 0x08)
first_entry_offset, = struct.unpack_from(b'>L', data, 4)
control_byte_count, = struct.unpack_from(b'>L', data, 8)
# Skip the first 12 bytes already read above.
for i in xrange(12, first_entry_offset, 4):
pos = i
tags.append((ord(data[pos]), ord(data[pos+1]), ord(data[pos+2]),
ord(data[pos+3])))
vals = list(bytearray(data[i:i+4]))
tags.append(TagX(*vals))
return control_byte_count, tags
def get_tag_map(control_byte_count, tags, data, start, end):
def get_tag_map(control_byte_count, tagx, data, strict=False):
ptags = []
ans = {}
control_byte_index = 0
data_start = start + control_byte_count
control_bytes = list(bytearray(data[:control_byte_count]))
data = data[control_byte_count:]
for tag, values_per_entry, mask, end_flag in tags:
if end_flag == 0x01:
control_byte_index += 1
for x in tagx:
if x.eof == 0x01:
control_bytes = control_bytes[1:]
continue
value = ord(data[start + control_byte_index]) & mask
value = control_bytes[0] & x.bitmask
if value != 0:
if value == mask:
if count_set_bits(mask) > 1:
value_count = value_bytes = None
if value == x.bitmask:
if count_set_bits(x.bitmask) > 1:
# If all bits of masked value are set and the mask has more
# than one bit, a variable width value will follow after
# the control bytes which defines the length of bytes (NOT
# the value count!) which will contain the corresponding
# variable width values.
value, consumed = decint(data[data_start:])
data_start += consumed
ptags.append((tag, None, value, values_per_entry))
value_bytes, consumed = decint(data)
data = data[consumed:]
else:
ptags.append((tag, 1, None, values_per_entry))
value_count = 1
else:
# Shift bits to get the masked value.
while mask & 0x01 == 0:
mask = mask >> 1
value = value >> 1
ptags.append((tag, value, None, values_per_entry))
for tag, value_count, value_bytes, values_per_entry in ptags:
mask = x.bitmask
while mask & 0b1 == 0:
mask >>= 1
value >>= 1
value_count = value
ptags.append(PTagX(x.tag, value_count, value_bytes,
x.num_of_values))
for x in ptags:
values = []
if value_count != None:
if x.value_count is not None:
# Read value_count * values_per_entry variable width values.
for _ in xrange(value_count*values_per_entry):
byts, consumed = decint(data[data_start:])
data_start += consumed
for _ in xrange(x.value_count * x.num_of_values):
byts, consumed = decint(data)
data = data[consumed:]
values.append(byts)
else:
else: # value_bytes is not None
# Convert value_bytes to variable width values.
total_consumed = 0
while total_consumed < value_bytes:
while total_consumed < x.value_bytes:
# Does this work for values_per_entry != 1?
byts, consumed = decint(data[data_start:])
data_start += consumed
byts, consumed = decint(data)
data = data[consumed:]
total_consumed += consumed
values.append(byts)
if total_consumed != value_bytes:
print ("Error: Should consume %s bytes, but consumed %s" %
(value_bytes, total_consumed))
ans[tag] = values
# Test that all bytes have been processed if end is given.
if end is not None and data_start < end:
# The last entry might have some zero padding bytes, so complain only if non zero bytes are left.
rest = data[data_start:end]
if rest.replace(b'\0', b''):
print ("Warning: There are unprocessed index bytes left: %s" %
format_bytes(rest))
if total_consumed != x.value_bytes:
err = ("Error: Should consume %s bytes, but consumed %s" %
(x.value_bytes, total_consumed))
if strict:
raise ValueError(err)
else:
print(err)
ans[x.tag] = values
# Test that all bytes have been processed
if data.replace(b'\0', b''):
err = ("Warning: There are unprocessed index bytes left: %s" %
format_bytes(data))
if strict:
raise ValueError(err)
else:
print(err)
return ans
def read_index(sections, idx, codec):
table, cncx = OrderedDict(), CNCX([], codec)
data = sections[idx][0]
indx_header = parse_indx_header(data)
indx_count = indx_header['count']
if indx_header['ncncx'] > 0:
off = idx + indx_count + 1
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
cncx = CNCX(cncx_records, codec)
tag_section_start = indx_header['len']
control_byte_count, tags = parse_tag_section(data[tag_section_start:])
for i in xrange(idx + 1, idx + 1 + indx_count):
data = sections[i][0]
def parse_index_record(table, data, control_byte_count, tags, codec,
strict=False):
header = parse_indx_header(data)
idxt_pos = header['start']
if data[idxt_pos:idxt_pos+4] != b'IDXT':
print ('WARNING: Invalid INDX record')
entry_count = header['count']
# loop through to build up the IDXT position starts
@ -187,11 +183,32 @@ def read_index(sections, idx, codec):
# text
for j in xrange(entry_count):
start, end = idx_positions[j:j+2]
text_length = ord(data[start])
text = data[start+1:start+1+text_length]
tag_map = get_tag_map(control_byte_count, tags, data,
start+1+text_length, end)
table[text] = tag_map
rec = data[start:end]
ident, consumed = decode_string(rec, codec=codec)
rec = rec[consumed:]
tag_map = get_tag_map(control_byte_count, tags, rec, strict=strict)
table[ident] = tag_map
def read_index(sections, idx, codec):
table, cncx = OrderedDict(), CNCX([], codec)
data = sections[idx][0]
indx_header = parse_indx_header(data)
indx_count = indx_header['count']
if indx_header['ncncx'] > 0:
off = idx + indx_count + 1
cncx_records = [x[0] for x in sections[off:off+indx_header['ncncx']]]
cncx = CNCX(cncx_records, codec)
tag_section_start = indx_header['len']
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
for i in xrange(idx + 1, idx + 1 + indx_count):
# Index record
data = sections[i][0]
parse_index_record(table, data, control_byte_count, tags, codec)
return table, cncx

View File

@ -33,9 +33,11 @@ def update_internal_links(mobi8_reader):
for m in posfid_index_pattern.finditer(tag):
posfid = m.group(1)
offset = m.group(2)
filename, idtag = mr.get_id_tag_by_pos_fid(posfid, offset)
filename, idtag = mr.get_id_tag_by_pos_fid(int(posfid, 32),
int(offset, 32))
suffix = (b'#' + idtag) if idtag else b''
replacement = filename.encode(mr.header.codec) + suffix
replacement = filename.split('/')[-1].encode(
mr.header.codec) + suffix
tag = posfid_index_pattern.sub(replacement, tag, 1)
srcpieces[j] = tag
part = ''.join([x.decode(mr.header.codec) for x in srcpieces])

View File

@ -107,7 +107,10 @@ class MobiReader(object):
self.kf8_type = None
k8i = getattr(self.book_header.exth, 'kf8_header', None)
if self.book_header.mobi_version == 8:
# Ancient PRC files from Baen can have random values for
# mobi_version, so be conservative
if (self.book_header.mobi_version == 8 and hasattr(self.book_header,
'skelidx')):
self.kf8_type = 'standalone'
elif k8i is not None: # Check for joint mobi 6 and kf 8 file
try:
@ -118,12 +121,17 @@ class MobiReader(object):
try:
self.book_header = BookHeader(self.sections[k8i][0],
self.ident, user_encoding, self.log)
# The following are only correct in the Mobi 6
# header not the Mobi 8 header
# Only the first_image_index from the MOBI 6 header is
# useful
for x in ('first_image_index',):
setattr(self.book_header, x, getattr(bh, x))
# We need to do this because the MOBI 6 text extract code
# does not know anything about the kf8 offset
if hasattr(self.book_header, 'huff_offset'):
self.book_header.huff_offset += k8i
self.kf8_type = 'joint'
self.kf8_boundary = k8i-1
except:

View File

@ -33,6 +33,7 @@ class Mobi8Reader(object):
def __init__(self, mobi6_reader, log):
self.mobi6_reader, self.log = mobi6_reader, log
self.header = mobi6_reader.book_header
self.encrypted_fonts = []
def __call__(self):
self.mobi6_reader.check_for_drm()
@ -229,11 +230,9 @@ class Mobi8Reader(object):
def get_id_tag_by_pos_fid(self, posfid, offset):
# first convert kindle:pos:fid and offset info to position in file
row = int(posfid, 32)
off = int(offset, 32)
[insertpos, idtext, filenum, seqnm, startpos, length] = self.elems[row]
pos = insertpos + off
fname = self.get_file_info(pos).filename
insertpos, idtext, filenum, seqnm, startpos, length = self.elems[posfid]
pos = insertpos + offset
fi = self.get_file_info(pos)
# an existing "id=" must exist in original xhtml otherwise it would not
# have worked for linking. Amazon seems to have added its own
# additional "aid=" inside tags whose contents seem to represent some
@ -242,7 +241,7 @@ class Mobi8Reader(object):
# so find the closest "id=" before position the file by actually
# searching in that file
idtext = self.get_id_tag(pos)
return fname, idtext
return '%s/%s'%(fi.type, fi.filename), idtext
def get_id_tag(self, pos):
# find the correct tag by actually searching in the destination
@ -253,12 +252,13 @@ class Mobi8Reader(object):
textblock = self.parts[fi.num]
id_map = []
npos = pos - fi.start
# if npos inside a tag then search all text before the its end of tag
# marker
pgt = textblock.find(b'>', npos)
plt = textblock.find(b'<', npos)
if pgt < plt:
# if npos inside a tag then search all text before the its end of tag marker
# else not in a tag need to search the preceding tag
if plt == npos or pgt < plt:
npos = pgt + 1
textblock = textblock[0:npos]
# find id links only inside of tags
# inside any < > pair find all "id=' and return whatever is inside
# the quotes
@ -315,12 +315,18 @@ class Mobi8Reader(object):
# Add href and anchor info to the index entries
for entry in index_entries:
pos_fid = entry['pos_fid']
if pos_fid is None:
pos = entry['pos']
fi = self.get_file_info(pos)
if fi.filename is None:
raise ValueError('Index entry has invalid pos: %d'%pos)
idtag = self.get_id_tag(pos).decode(self.header.codec)
entry['href'] = '%s/%s'%(fi.type, fi.filename)
href = '%s/%s'%(fi.type, fi.filename)
else:
href, idtag = self.get_id_tag_by_pos_fid(*pos_fid)
entry['href'] = href
entry['idtag'] = idtag
# Build the TOC object
@ -350,6 +356,8 @@ class Mobi8Reader(object):
with open(href.replace('/', os.sep), 'wb') as f:
f.write(font['font_data'] if font['font_data'] else
font['raw_data'])
if font['encrypted']:
self.encrypted_fonts.append(href)
else:
imgtype = imghdr.what(None, data)
if imgtype is None:

View File

@ -10,7 +10,6 @@ __docformat__ = 'restructuredtext en'
import os
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.mobi.utils import to_base
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import read_index
@ -23,7 +22,30 @@ tag_fieldname_map = {
6: ['pos_fid',0],
21: ['parent',0],
22: ['child1',0],
23: ['childn',0]
23: ['childn',0],
69: ['image_index',0],
70 : ['desc_offset', 0], # 'Description offset in cncx'
71 : ['author_offset', 0], # 'Author offset in cncx'
72 : ['image_caption_offset', 0], # 'Image caption offset in cncx',
73 : ['image_attr_offset', 0], # 'Image attribution offset in cncx',
}
default_entry = {
'pos': -1,
'len': 0,
'noffs': -1,
'text' : "Unknown Text",
'hlvl' : -1,
'kind' : "Unknown Class",
'pos_fid' : None,
'parent' : -1,
'child1' : -1,
'childn' : -1,
'description': None,
'author': None,
'image_caption': None,
'image_attribution': None,
}
def read_ncx(sections, index, codec):
@ -34,32 +56,25 @@ def read_ncx(sections, index, codec):
for num, x in enumerate(table.iteritems()):
text, tag_map = x
entry = {
'name': text,
'pos': -1,
'len': 0,
'noffs': -1,
'text' : "Unknown Text",
'hlvl' : -1,
'kind' : "Unknown Kind",
'pos_fid' : None,
'parent' : -1,
'child1' : -1,
'childn' : -1,
'num' : num
}
entry = default_entry.copy()
entry['name'] = text
entry['num'] = num
for tag in tag_fieldname_map.keys():
for tag in tag_fieldname_map.iterkeys():
fieldname, i = tag_fieldname_map[tag]
if tag in tag_map:
fieldvalue = tag_map[tag][i]
if tag == 6:
fieldvalue = to_base(fieldvalue, base=32)
# Appears to be an idx into the KF8 elems table with an
# offset
fieldvalue = tuple(tag_map[tag])
entry[fieldname] = fieldvalue
if tag == 3:
entry['text'] = cncx.get(fieldvalue, 'Unknown Text')
if tag == 5:
entry['kind'] = cncx.get(fieldvalue, 'Unknown Kind')
for which, name in {3:'text', 5:'kind', 70:'description',
71:'author', 72:'image_caption',
73:'image_attribution'}.iteritems():
if tag == which:
entry[name] = cncx.get(fieldvalue,
default_entry[name])
index_entries.append(entry)
return index_entries

View File

@ -15,7 +15,13 @@ from calibre.ebooks import normalize
IMAGE_MAX_SIZE = 10 * 1024 * 1024
def decode_hex_number(raw):
def decode_string(raw, codec='utf-8'):
length, = struct.unpack(b'>B', raw[0])
raw = raw[1:1+length]
consumed = length+1
return raw.decode(codec), consumed
def decode_hex_number(raw, codec='utf-8'):
'''
Return a variable length number encoded using hexadecimal encoding. These
numbers have the first byte which tells the number of bytes that follow.
@ -25,13 +31,16 @@ def decode_hex_number(raw):
:param raw: Raw binary data as a bytestring
:return: The number and the number of bytes from raw that the number
occupies
occupies.
'''
length, = struct.unpack(b'>B', raw[0])
raw = raw[1:1+length]
consumed = length+1
raw, consumed = decode_string(raw, codec=codec)
return int(raw, 16), consumed
def encode_string(raw):
ans = bytearray(bytes(raw))
ans.insert(0, len(ans))
return bytes(ans)
def encode_number_as_hex(num):
'''
Encode num as a variable length encoded hexadecimal number. Returns the
@ -44,9 +53,7 @@ def encode_number_as_hex(num):
nlen = len(num)
if nlen % 2 != 0:
num = b'0'+num
ans = bytearray(num)
ans.insert(0, len(num))
return bytes(ans)
return encode_string(num)
def encint(value, forward=True):
'''
@ -430,7 +437,7 @@ def read_font_record(data, extent=1040): # {{{
# The zlib compressed data begins with 2 bytes of header and
# has 4 bytes of checksum at the end
ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
'headers':None}
'headers':None, 'encrypted':False}
try:
usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
@ -453,6 +460,7 @@ def read_font_record(data, extent=1040): # {{{
buf[n] ^= key[n%xor_len] # XOR of buf and key
font_data = bytes(buf)
ans['encrypted'] = True
if flags & 0b1:
# ZLIB compressed data

View File

@ -234,6 +234,8 @@ class RTFMLizer(object):
# Process tags that need special processing and that do not have inner
# text. Usually these require an argument
if tag == 'img':
src = elem.get('src')
if src:
src = os.path.basename(elem.get('src'))
block_start = ''
block_end = ''

View File

@ -70,6 +70,9 @@ class AddAction(InterfaceAction):
self.add_menu.addSeparator()
ma('add-formats', _('Add files to selected book records'),
triggered=self.add_formats, shortcut=_('Shift+A'))
self.add_menu.addSeparator()
ma('add-config', _('Configure the adding of books'),
triggered=self.add_config)
self.qaction.triggered.connect(self.add_books)
@ -78,6 +81,11 @@ class AddAction(InterfaceAction):
for action in list(self.add_menu.actions())[1:]:
action.setEnabled(enabled)
def add_config(self):
self.gui.iactions['Preferences'].do_config(
initial_plugin=('Import/Export', 'Adding'),
close_after_initial=True)
def add_formats(self, *args):
if self.gui.stack.currentIndex() != 0:
return

View File

@ -3,7 +3,7 @@
from __future__ import (unicode_literals, division, absolute_import, print_function)
__license__ = 'GPL 3'
__copyright__ = '2011, Tomasz Długosz <tomek3d@gmail.com>'
__copyright__ = '2011-2012, Tomasz Długosz <tomek3d@gmail.com>'
__docformat__ = 'restructuredtext en'
import re
@ -47,9 +47,12 @@ class NextoStore(BasicStoreConfig, StorePlugin):
url = 'http://www.nexto.pl/szukaj.xml?search-clause=' + urllib.quote_plus(query) + '&scid=1015'
br = browser()
offset=0
counter = max_results
with closing(br.open(url, timeout=timeout)) as f:
while counter:
with closing(br.open(url + '&_offset=' + str(offset), timeout=timeout)) as f:
doc = html.fromstring(f.read())
for data in doc.xpath('//ul[@class="productslist"]/li'):
if counter <= 0:
@ -85,3 +88,6 @@ class NextoStore(BasicStoreConfig, StorePlugin):
s.formats = formats.upper().strip()
yield s
if not doc.xpath('//div[@class="listnavigator"]//a[@class="next"]'):
break
offset+=10

View File

@ -255,7 +255,10 @@
</widget>
</item>
<item row="3" column="1">
<widget class="QSpinBox" name="max_view_width">
<widget class="QSpinBox" name="max_fs_width">
<property name="toolTip">
<string>Set the maximum width that the book's text and pictures will take when in fullscreen mode. This allows you to read the book text without it becoming too wide.</string>
</property>
<property name="suffix">
<string> px</string>
</property>
@ -270,10 +273,10 @@
<item row="3" column="0">
<widget class="QLabel" name="label_7">
<property name="text">
<string>Maximum &amp;view width:</string>
<string>Maximum text width in &amp;fullscreen:</string>
</property>
<property name="buddy">
<cstring>max_view_width</cstring>
<cstring>max_fs_width</cstring>
</property>
</widget>
</item>
@ -350,7 +353,7 @@
<tabstop>serif_family</tabstop>
<tabstop>sans_family</tabstop>
<tabstop>mono_family</tabstop>
<tabstop>max_view_width</tabstop>
<tabstop>max_fs_width</tabstop>
<tabstop>opt_remember_window_size</tabstop>
<tabstop>buttonBox</tabstop>
</tabstops>

View File

@ -12,7 +12,7 @@ from PyQt4.Qt import (QSize, QSizePolicy, QUrl, SIGNAL, Qt, QTimer,
QPainter, QPalette, QBrush, QFontDatabase, QDialog,
QColor, QPoint, QImage, QRegion, QVariant, QIcon,
QFont, pyqtSignature, QAction, QByteArray, QMenu,
pyqtSignal, QSwipeGesture)
pyqtSignal, QSwipeGesture, QApplication)
from PyQt4.QtWebKit import QWebPage, QWebView, QWebSettings
from calibre.utils.config import Config, StringConfig
@ -46,8 +46,10 @@ def config(defaults=None):
help=_('Remember last used window size'))
c.add_opt('user_css', default='',
help=_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
c.add_opt('max_view_width', default=6000,
help=_('Maximum width of the viewer window, in pixels.'))
c.add_opt('max_fs_width', default=800,
help=_("Set the maximum width that the book's text and pictures will take"
" when in fullscreen mode. This allows you to read the book text"
" without it becoming too wide."))
c.add_opt('fit_images', default=True,
help=_('Resize images larger than the viewer window to fit inside it'))
c.add_opt('hyphenate', default=False, help=_('Hyphenate text'))
@ -101,7 +103,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
self.standard_font.setCurrentIndex({'serif':0, 'sans':1, 'mono':2}[opts.standard_font])
self.css.setPlainText(opts.user_css)
self.css.setToolTip(_('Set the user CSS stylesheet. This can be used to customize the look of all books.'))
self.max_view_width.setValue(opts.max_view_width)
self.max_fs_width.setValue(opts.max_fs_width)
with zipfile.ZipFile(P('viewer/hyphenate/patterns.zip',
allow_user_override=False), 'r') as zf:
pats = [x.split('.')[0].replace('-', '_') for x in zf.namelist()]
@ -144,7 +146,7 @@ class ConfigDialog(QDialog, Ui_Dialog):
c.set('user_css', unicode(self.css.toPlainText()))
c.set('remember_window_size', self.opt_remember_window_size.isChecked())
c.set('fit_images', self.opt_fit_images.isChecked())
c.set('max_view_width', int(self.max_view_width.value()))
c.set('max_fs_width', int(self.max_fs_width.value()))
c.set('hyphenate', self.hyphenate.isChecked())
c.set('remember_current_page', self.opt_remember_current_page.isChecked())
c.set('wheel_flips_pages', self.opt_wheel_flips_pages.isChecked())
@ -192,6 +194,8 @@ class Document(QWebPage): # {{{
self.loaded_javascript = False
self.js_loader = JavaScriptLoader(
dynamic_coffeescript=self.debug_javascript)
self.initial_left_margin = self.initial_right_margin = u''
self.in_fullscreen_mode = False
self.setLinkDelegationPolicy(self.DelegateAllLinks)
self.scroll_marks = []
@ -239,6 +243,9 @@ class Document(QWebPage): # {{{
self.enable_page_flip = self.page_flip_duration > 0.1
self.font_magnification_step = opts.font_magnification_step
self.wheel_flips_pages = opts.wheel_flips_pages
screen_width = QApplication.desktop().screenGeometry().width()
# Leave some space for the scrollbar and some border
self.max_fs_width = min(opts.max_fs_width, screen_width-50)
def fit_images(self):
if self.do_fit_images:
@ -274,6 +281,30 @@ class Document(QWebPage): # {{{
self.set_bottom_padding(0)
self.fit_images()
self.init_hyphenate()
self.initial_left_margin = unicode(self.javascript(
'document.body.style.marginLeft').toString())
self.initial_right_margin = unicode(self.javascript(
'document.body.style.marginRight').toString())
if self.in_fullscreen_mode:
self.switch_to_fullscreen_mode()
def switch_to_fullscreen_mode(self):
self.in_fullscreen_mode = True
self.javascript('''
var s = document.body.style;
s.maxWidth = "%dpx";
s.marginLeft = "auto";
s.marginRight = "auto";
'''%self.max_fs_width)
def switch_to_window_mode(self):
self.in_fullscreen_mode = False
self.javascript('''
var s = document.body.style;
s.maxWidth = "none";
s.marginLeft = "%s";
s.marginRight = "%s";
'''%(self.initial_left_margin, self.initial_right_margin))
@pyqtSignature("QString")
def debug(self, msg):
@ -581,8 +612,8 @@ class DocumentView(QWebView): # {{{
def config(self, parent=None):
self.document.do_config(parent)
if self.manager is not None:
self.manager.set_max_width()
if self.document.in_fullscreen_mode:
self.document.switch_to_fullscreen_mode()
self.setFocus(Qt.OtherFocusReason)
def bookmark(self):
@ -602,6 +633,9 @@ class DocumentView(QWebView): # {{{
menu.insertAction(list(menu.actions())[0], self.search_action)
menu.addSeparator()
menu.addAction(self.goto_location_action)
if self.document.in_fullscreen_mode and self.manager is not None:
menu.addSeparator()
menu.addAction(self.manager.toggle_toolbar_action)
menu.exec_(ev.globalPos())
def lookup(self, *args):

View File

@ -5,11 +5,11 @@ import traceback, os, sys, functools, collections, re
from functools import partial
from threading import Thread
from PyQt4.Qt import QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray, \
QDoubleSpinBox, QLabel, QTextBrowser, \
QPainter, QBrush, QColor, QStandardItemModel, QPalette, \
QStandardItem, QUrl, QRegExpValidator, QRegExp, QLineEdit, \
QToolButton, QMenu, QInputDialog, QAction, QKeySequence
from PyQt4.Qt import (QApplication, Qt, QIcon, QTimer, SIGNAL, QByteArray,
QSize, QDoubleSpinBox, QLabel, QTextBrowser, QPropertyAnimation,
QPainter, QBrush, QColor, QStandardItemModel, QPalette, QStandardItem,
QUrl, QRegExpValidator, QRegExp, QLineEdit, QToolButton, QMenu,
QInputDialog, QAction, QKeySequence)
from calibre.gui2.viewer.main_ui import Ui_EbookViewer
from calibre.gui2.viewer.printing import Printing
@ -55,8 +55,6 @@ class TOC(QStandardItemModel):
self.appendRow(TOCItem(t))
self.setHorizontalHeaderItem(0, QStandardItem(_('Table of Contents')))
class Worker(Thread):
def run(self):
@ -292,6 +290,37 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.tool_bar2.setContextMenuPolicy(Qt.PreventContextMenu)
self.tool_bar.widgetForAction(self.action_bookmark).setPopupMode(QToolButton.MenuButtonPopup)
self.action_full_screen.setCheckable(True)
self.full_screen_label = QLabel('''
<center>
<h1>%s</h1>
<h3>%s</h3>
<h3>%s</h3>
</center>
'''%(_('Full screen mode'),
_('Right click to show controls'),
_('Press Esc to quit')),
self)
self.full_screen_label.setVisible(False)
self.full_screen_label.setStyleSheet('''
QLabel {
text-align: center;
background-color: white;
color: black;
border-width: 1px;
border-style: solid;
border-radius: 20px;
}
''')
self.toggle_toolbar_action = QAction(_('Show/hide controls'), self)
self.toggle_toolbar_action.triggered.connect(self.toggle_toolbars)
self.addAction(self.toggle_toolbar_action)
self.full_screen_label_anim = QPropertyAnimation(
self.full_screen_label, 'size')
self.esc_full_screen_action = a = QAction(self)
self.addAction(a)
a.setShortcut(Qt.Key_Escape)
a.setEnabled(False)
a.triggered.connect(self.action_full_screen.trigger)
self.print_menu = QMenu()
self.print_menu.addAction(QIcon(I('print-preview.png')), _('Print Preview'))
@ -299,7 +328,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self.tool_bar.widgetForAction(self.action_print).setPopupMode(QToolButton.MenuButtonPopup)
self.connect(self.action_print, SIGNAL("triggered(bool)"), partial(self.print_book, preview=False))
self.connect(self.print_menu.actions()[0], SIGNAL("triggered(bool)"), partial(self.print_book, preview=True))
self.set_max_width()
ca = self.view.copy_action
ca.setShortcut(QKeySequence.Copy)
self.addAction(ca)
@ -313,6 +341,13 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
w = self.tool_bar.widgetForAction(self.action_open_ebook)
w.setPopupMode(QToolButton.MenuButtonPopup)
for x in ('tool_bar', 'tool_bar2'):
x = getattr(self, x)
for action in x.actions():
# So that the keyboard shortcuts for these actions will
# continue to function even when the toolbars are hidden
self.addAction(action)
self.restore_state()
def set_toc_visible(self, yes):
@ -338,9 +373,18 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
count += 1
def closeEvent(self, e):
if self.isFullScreen():
self.action_full_screen.trigger()
e.ignore()
return
self.save_state()
return MainWindow.closeEvent(self, e)
def toggle_toolbars(self):
for x in ('tool_bar', 'tool_bar2'):
x = getattr(self, x)
x.setVisible(not x.isVisible())
def save_state(self):
state = bytearray(self.saveState(self.STATE_VERSION))
vprefs['viewer_toolbar_state'] = state
@ -382,11 +426,6 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
self._lookup = None
self.dictionary_view.setHtml(html)
def set_max_width(self):
from calibre.gui2.viewer.documentview import config
c = config().parse()
self.frame.setMaximumWidth(c.max_view_width)
def get_remember_current_page_opt(self):
from calibre.gui2.viewer.documentview import config
c = config().parse()
@ -401,6 +440,46 @@ class EbookViewer(MainWindow, Ui_EbookViewer):
else:
self.showFullScreen()
def showFullScreen(self):
self.tool_bar.setVisible(False)
self.tool_bar2.setVisible(False)
self._original_frame_margins = (
self.centralwidget.layout().contentsMargins(),
self.frame.layout().contentsMargins())
self.frame.layout().setContentsMargins(0, 0, 0, 0)
self.centralwidget.layout().setContentsMargins(0, 0, 0, 0)
super(EbookViewer, self).showFullScreen()
QTimer.singleShot(10, self.show_full_screen_label)
def show_full_screen_label(self):
f = self.full_screen_label
self.esc_full_screen_action.setEnabled(True)
f.setVisible(True)
height = 200
width = int(0.7*self.view.width())
f.resize(width, height)
f.move((self.view.width() - width)//2, (self.view.height()-height)//2)
a = self.full_screen_label_anim
a.setDuration(500)
a.setStartValue(QSize(width, 0))
a.setEndValue(QSize(width, height))
a.start()
QTimer.singleShot(2750, self.full_screen_label.hide)
self.view.document.switch_to_fullscreen_mode()
def showNormal(self):
self.esc_full_screen_action.setEnabled(False)
self.tool_bar.setVisible(True)
self.tool_bar2.setVisible(True)
self.full_screen_label.setVisible(False)
if hasattr(self, '_original_frame_margins'):
om = self._original_frame_margins
self.centralwidget.layout().setContentsMargins(om[0])
self.frame.layout().setContentsMargins(om[1])
super(EbookViewer, self).showNormal()
self.view.document.switch_to_window_mode()
def goto(self, ref):
if ref:
tokens = ref.split('.')

View File

@ -284,6 +284,9 @@
<property name="text">
<string>Toggle full screen</string>
</property>
<property name="toolTip">
<string>Toggle full screen (F11)</string>
</property>
</action>
<action name="action_print">
<property name="icon">

View File

@ -15,6 +15,7 @@ from PyQt4.Qt import (QIcon, QFont, QLabel, QListWidget, QAction,
QMenu, QStringListModel, QCompleter, QStringList,
QTimer, QRect, QFontDatabase, QGraphicsView)
from calibre.constants import iswindows
from calibre.gui2 import (NONE, error_dialog, pixmap_to_data, gprefs,
warning_dialog)
from calibre.gui2.filename_pattern_ui import Ui_Form
@ -365,7 +366,7 @@ class FontFamilyModel(QAbstractListModel): # {{{
self.families = list(qt_families.intersection(set(self.families)))
self.families.sort()
self.families[:0] = [_('None')]
self.font = QFont('sansserif')
self.font = QFont('Verdana' if iswindows else 'sansserif')
def rowCount(self, *args):
return len(self.families)

View File

@ -591,6 +591,21 @@ def educateQuotes(str):
str = re.sub(r'''""''', """&#8221;&#8221;""", str)
str = re.sub(r"""''""", """&#8217;&#8217;""", str)
# Special case for Quotes at inside of other entities, e.g.:
# <p>A double quote--"within dashes"--would be nice.</p>
str = re.sub(r"""(?<=\W)"(?=\w)""", r"""&#8220;""", str)
str = re.sub(r"""(?<=\W)'(?=\w)""", r"""&#8216;""", str)
str = re.sub(r"""(?<=\w)"(?=\W)""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\w)'(?=\W)""", r"""&#8217;""", str)
# Special case for Quotes at end of line with a preceeding space (may change just to end of line)
str = re.sub(r"""(?<=\s)"$""", r"""&#8221;""", str)
str = re.sub(r"""(?<=\s)'$""", r"""&#8217;""", str)
# Special case for Quotes at beginning of line with a space - multiparagraph quoted text:
str = re.sub(r"""^"(?=\s)""", r"""&#8220;""", str)
str = re.sub(r"""^'(?=\s)""", r"""&#8216;""", str)
# Special case for decade abbreviations (the '80s):
str = re.sub(r"""\b'(?=\d{2}s)""", r"""&#8217;""", str)