Merge from trunk

This commit is contained in:
Charles Haley 2012-03-13 10:59:46 +01:00
commit 07033edff2
10 changed files with 111 additions and 3031 deletions

View File

@ -52,7 +52,7 @@ class MOBIInput(InputFormatPlugin):
mr.extract_content(u'.', parse_cache) mr.extract_content(u'.', parse_cache)
if mr.kf8_type is not None: if mr.kf8_type is not None:
log('Found KF8 MOBI of type %s'%mr.kf8_type) log('Found KF8 MOBI of type %r'%mr.kf8_type)
from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader from calibre.ebooks.mobi.reader.mobi8 import Mobi8Reader
return os.path.abspath(Mobi8Reader(mr, log)()) return os.path.abspath(Mobi8Reader(mr, log)())

View File

@ -18,9 +18,6 @@ class MOBIOutput(OutputFormatPlugin):
file_type = 'mobi' file_type = 'mobi'
options = set([ options = set([
OptionRecommendation(name='rescale_images', recommended_value=False,
help=_('Modify images to meet Palm device size limitations.')
),
OptionRecommendation(name='prefer_author_sort', OptionRecommendation(name='prefer_author_sort',
recommended_value=False, level=OptionRecommendation.LOW, recommended_value=False, level=OptionRecommendation.LOW,
help=_('When present, use author sort field as author.') help=_('When present, use author sort field as author.')
@ -167,12 +164,7 @@ class MOBIOutput(OutputFormatPlugin):
mobimlizer(oeb, opts) mobimlizer(oeb, opts)
self.check_for_periodical() self.check_for_periodical()
write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
from calibre.utils.config import tweaks from calibre.ebooks.mobi.writer2.main import MobiWriter
if tweaks.get('new_mobi_writer', True):
from calibre.ebooks.mobi.writer2.main import MobiWriter
MobiWriter
else:
from calibre.ebooks.mobi.writer import MobiWriter
writer = MobiWriter(opts, writer = MobiWriter(opts,
write_page_breaks_after_item=write_page_breaks_after_item) write_page_breaks_after_item=write_page_breaks_after_item)
writer(oeb, output_path) writer(oeb, output_path)

View File

@ -9,16 +9,21 @@ __copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net and ' \
'Marshall T. Vandegrift <llasram@gmail.com>' 'Marshall T. Vandegrift <llasram@gmail.com>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os, cStringIO import os, cStringIO, imghdr
from struct import pack, unpack from struct import pack, unpack
from cStringIO import StringIO from cStringIO import StringIO
from calibre.ebooks import normalize from calibre.ebooks import normalize
from calibre.ebooks.mobi import MobiError from calibre.ebooks.mobi import MobiError, MAX_THUMB_DIMEN
from calibre.ebooks.mobi.writer import rescale_image, MAX_THUMB_DIMEN from calibre.ebooks.mobi.utils import rescale_image
from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.langcodes import iana2mobi
from calibre.utils.date import now as nowf from calibre.utils.date import now as nowf
def is_image(ss):
if ss is None:
return False
return imghdr.what(None, ss[:200]) is not None
class StreamSlicer(object): class StreamSlicer(object):
def __init__(self, stream, start=0, stop=None): def __init__(self, stream, start=0, stop=None):
@ -161,11 +166,10 @@ class MetadataUpdater(object):
if id == 106: if id == 106:
self.timestamp = content self.timestamp = content
elif id == 201: elif id == 201:
rindex, = self.cover_rindex, = unpack('>i', content) rindex, = self.cover_rindex, = unpack('>I', content)
if rindex > 0 : self.cover_record = self.record(rindex + image_base)
self.cover_record = self.record(rindex + image_base)
elif id == 202: elif id == 202:
rindex, = self.thumbnail_rindex, = unpack('>i', content) rindex, = self.thumbnail_rindex, = unpack('>I', content)
if rindex > 0 : if rindex > 0 :
self.thumbnail_record = self.record(rindex + image_base) self.thumbnail_record = self.record(rindex + image_base)
@ -416,17 +420,17 @@ class MetadataUpdater(object):
except: except:
pass pass
else: else:
if self.cover_record is not None: if is_image(self.cover_record):
size = len(self.cover_record) size = len(self.cover_record)
cover = rescale_image(data, size) cover = rescale_image(data, size)
if len(cover) <= size: if len(cover) <= size:
cover += '\0' * (size - len(cover)) cover += b'\0' * (size - len(cover))
self.cover_record[:] = cover self.cover_record[:] = cover
if self.thumbnail_record is not None: if is_image(self.thumbnail_record):
size = len(self.thumbnail_record) size = len(self.thumbnail_record)
thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN) thumbnail = rescale_image(data, size, dimen=MAX_THUMB_DIMEN)
if len(thumbnail) <= size: if len(thumbnail) <= size:
thumbnail += '\0' * (size - len(thumbnail)) thumbnail += b'\0' * (size - len(thumbnail))
self.thumbnail_record[:] = thumbnail self.thumbnail_record[:] = thumbnail
return return

View File

@ -6,3 +6,8 @@ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
class MobiError(Exception): class MobiError(Exception):
pass pass
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)

View File

@ -7,7 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import struct, datetime, sys, os, shutil import struct, datetime, sys, os, shutil, zlib
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
from lxml import html from lxml import html
@ -1149,6 +1149,38 @@ class BinaryRecord(object): # {{{
# }}} # }}}
class FontRecord(object): # {{{
def __init__(self, idx, record):
self.raw = record.raw
name = '%06d'%idx
(self.uncompressed_size, self.unknown1, self.unknown2) = \
struct.unpack_from(b'>LLL', self.raw, 4)
self.payload = self.raw[4:]
self.ext = 'unknown'
if self.unknown1 == 1:
self.zlib_header = self.raw[self.unknown2:self.unknown2+2]
self.payload = zlib.decompress(self.raw[self.unknown2+2:-4], -15)
hdr = self.payload[:4]
if hdr in {b'\0\1\0\0', b'true', b'ttcf'}:
self.ext = 'ttf'
if self.uncompressed_size != len(self.payload):
raise ValueError('Font record uncompressed size mismatch',
' expected: %d actual: %d'%(self.uncompressed_size,
len(self.payload)))
else:
print ('Unknown font record with fields: %s' %
[self.uncompressed_size, self.unknown1, self.unknown2])
print ('\tAdditional fields: %s'%((
struct.unpack_from(b'>LL', self.raw, 16),)))
self.name = '%s.%s'%(name, self.ext)
def dump(self, folder):
with open(os.path.join(folder, self.name), 'wb') as f:
f.write(self.payload)
# }}}
class TBSIndexing(object): # {{{ class TBSIndexing(object): # {{{
def __init__(self, text_records, indices, doc_type): def __init__(self, text_records, indices, doc_type):
@ -1410,6 +1442,7 @@ class MOBIFile(object): # {{{
self.mobi_header.extra_data_flags, decompress) for r in xrange(1, self.mobi_header.extra_data_flags, decompress) for r in xrange(1,
min(len(self.records), ntr+1))] min(len(self.records), ntr+1))]
self.image_records, self.binary_records = [], [] self.image_records, self.binary_records = [], []
self.font_records = []
image_index = 0 image_index = 0
for i in xrange(fntbr, len(self.records)): for i in xrange(fntbr, len(self.records)):
if i in self.indexing_record_nums or i in self.huffman_record_nums: if i in self.indexing_record_nums or i in self.huffman_record_nums:
@ -1419,13 +1452,15 @@ class MOBIFile(object): # {{{
fmt = None fmt = None
if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS', if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP', b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
b'AUDI', b'VIDE'}: b'AUDI', b'VIDE', b'FONT'}:
try: try:
width, height, fmt = identify_data(r.raw) width, height, fmt = identify_data(r.raw)
except: except:
pass pass
if fmt is not None: if fmt is not None:
self.image_records.append(ImageRecord(image_index, r, fmt)) self.image_records.append(ImageRecord(image_index, r, fmt))
elif r.raw[:4] == b'FONT':
self.font_records.append(FontRecord(i, r))
else: else:
self.binary_records.append(BinaryRecord(i, r)) self.binary_records.append(BinaryRecord(i, r))
@ -1465,10 +1500,11 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
of.write(rec.raw) of.write(rec.raw)
alltext += rec.raw alltext += rec.raw
of.seek(0) of.seek(0)
root = html.fromstring(alltext.decode('utf-8')) if f.mobi_header.file_version < 8:
with open(os.path.join(ddir, 'pretty.html'), 'wb') as of: root = html.fromstring(alltext.decode('utf-8'))
of.write(html.tostring(root, pretty_print=True, encoding='utf-8', with open(os.path.join(ddir, 'pretty.html'), 'wb') as of:
include_meta_content_type=True)) of.write(html.tostring(root, pretty_print=True, encoding='utf-8',
include_meta_content_type=True))
if f.index_header is not None: if f.index_header is not None:
@ -1490,7 +1526,7 @@ def inspect_mobi(path_or_stream, ddir=None): # {{{
f.tbs_indexing.dump(ddir) f.tbs_indexing.dump(ddir)
for tdir, attr in [('text', 'text_records'), ('images', 'image_records'), for tdir, attr in [('text', 'text_records'), ('images', 'image_records'),
('binary', 'binary_records')]: ('binary', 'binary_records'), ('font', 'font_records')]:
tdir = os.path.join(ddir, tdir) tdir = os.path.join(ddir, tdir)
os.mkdir(tdir) os.mkdir(tdir)
for rec in getattr(f, attr): for rec in getattr(f, attr):

View File

@ -348,10 +348,10 @@ class Mobi8Reader(object):
# bytes 16 - 23: ?? typically all 0x00 ?? Are these compression flags from zlib? # bytes 16 - 23: ?? typically all 0x00 ?? Are these compression flags from zlib?
# The compressed data begins with 2 bytes of header and has 4 bytes of checksum at the end # The compressed data begins with 2 bytes of header and has 4 bytes of checksum at the end
try: try:
fields = struct.unpack_from(b'>LLLL', data, 4) fields = struct.unpack_from(b'>LLLLL', data, 4)
except: except:
fields = None fields = None
#self.log.debug('Font record fields: %s'%(fields,)) # self.log.debug('Font record fields: %s'%(fields,))
cdata = data[26:-4] cdata = data[26:-4]
ext = 'dat' ext = 'dat'
try: try:
@ -361,11 +361,13 @@ class Mobi8Reader(object):
'Fields: %s' % (fname_idx, fields,)) 'Fields: %s' % (fname_idx, fields,))
uncompressed_data = data[4:] uncompressed_data = data[4:]
ext = 'failed' ext = 'failed'
hdr = uncompressed_data[0:4]
if len(uncompressed_data) < 200: if len(uncompressed_data) < 200:
self.log.warn('Corrupted font record: %d'%fname_idx) self.log.warn('Failed to uncompress embedded font %d: '
'Fields: %s' % (fname_idx, fields,))
uncompressed_data = data[4:]
ext = 'failed' ext = 'failed'
if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf': hdr = uncompressed_data[:4]
if ext != 'failed' and hdr in {b'\0\1\0\0', b'true', b'ttcf'}:
ext = 'ttf' ext = 'ttf'
href = "fonts/%05d.%s" % (fname_idx, ext) href = "fonts/%05d.%s" % (fname_idx, ext)
with open(href.replace('/', os.sep), 'wb') as f: with open(href.replace('/', os.sep), 'wb') as f:

File diff suppressed because it is too large Load Diff

View File

@ -21,6 +21,7 @@ from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE)
from calibre.ebooks.mobi.utils import (rescale_image, encint, from calibre.ebooks.mobi.utils import (rescale_image, encint,
encode_trailing_data, align_block, detect_periodical) encode_trailing_data, align_block, detect_periodical)
from calibre.ebooks.mobi.writer2.indexer import Indexer from calibre.ebooks.mobi.writer2.indexer import Indexer
from calibre.ebooks.mobi import MAX_THUMB_DIMEN, MAX_THUMB_SIZE
EXTH_CODES = { EXTH_CODES = {
'creator': 100, 'creator': 100,
@ -46,9 +47,6 @@ EXTH_CODES = {
# Disabled as I dont care about uncrossable breaks # Disabled as I dont care about uncrossable breaks
WRITE_UNCROSSABLE_BREAKS = False WRITE_UNCROSSABLE_BREAKS = False
MAX_THUMB_SIZE = 16 * 1024
MAX_THUMB_DIMEN = (180, 240)
class MobiWriter(object): class MobiWriter(object):
COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')

View File

@ -21,7 +21,7 @@ class PluginWidget(Widget, Ui_Form):
def __init__(self, parent, get_option, get_help, db=None, book_id=None): def __init__(self, parent, get_option, get_help, db=None, book_id=None):
Widget.__init__(self, parent, Widget.__init__(self, parent,
['prefer_author_sort', 'rescale_images', 'toc_title', ['prefer_author_sort', 'toc_title',
'mobi_ignore_margins', 'mobi_toc_at_start', 'mobi_ignore_margins', 'mobi_toc_at_start',
'dont_compress', 'no_inline_toc', 'share_not_sync', 'dont_compress', 'no_inline_toc', 'share_not_sync',
'personal_doc']#, 'mobi_navpoints_only_deepest'] 'personal_doc']#, 'mobi_navpoints_only_deepest']

View File

@ -6,7 +6,7 @@
<rect> <rect>
<x>0</x> <x>0</x>
<y>0</y> <y>0</y>
<width>521</width> <width>588</width>
<height>342</height> <height>342</height>
</rect> </rect>
</property> </property>
@ -14,48 +14,7 @@
<string>Form</string> <string>Form</string>
</property> </property>
<layout class="QGridLayout" name="gridLayout"> <layout class="QGridLayout" name="gridLayout">
<item row="1" column="0"> <item row="7" column="0" colspan="2">
<widget class="QLabel" name="label">
<property name="text">
<string>&amp;Title for Table of Contents:</string>
</property>
<property name="buddy">
<cstring>opt_toc_title</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="opt_toc_title"/>
</item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_rescale_images">
<property name="text">
<string>Rescale images for &amp;Palm devices</string>
</property>
</widget>
</item>
<item row="5" column="0" colspan="2">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
</property>
</widget>
</item>
<item row="6" column="0">
<widget class="QCheckBox" name="opt_dont_compress">
<property name="text">
<string>Disable compression of the file contents</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<property name="text">
<string>Do not add Table of Contents to book</string>
</property>
</widget>
</item>
<item row="8" column="0" colspan="2">
<widget class="QGroupBox" name="groupBox"> <widget class="QGroupBox" name="groupBox">
<property name="title"> <property name="title">
<string>Kindle options</string> <string>Kindle options</string>
@ -98,7 +57,7 @@
</layout> </layout>
</widget> </widget>
</item> </item>
<item row="9" column="0"> <item row="8" column="0">
<spacer name="verticalSpacer_2"> <spacer name="verticalSpacer_2">
<property name="orientation"> <property name="orientation">
<enum>Qt::Vertical</enum> <enum>Qt::Vertical</enum>
@ -125,6 +84,40 @@
</property> </property>
</widget> </widget>
</item> </item>
<item row="4" column="0" colspan="2">
<widget class="QCheckBox" name="opt_prefer_author_sort">
<property name="text">
<string>Use author &amp;sort for author</string>
</property>
</widget>
</item>
<item row="1" column="0">
<widget class="QLabel" name="label">
<property name="text">
<string>&amp;Title for Table of Contents:</string>
</property>
<property name="buddy">
<cstring>opt_toc_title</cstring>
</property>
</widget>
</item>
<item row="1" column="1">
<widget class="QLineEdit" name="opt_toc_title"/>
</item>
<item row="5" column="0">
<widget class="QCheckBox" name="opt_dont_compress">
<property name="text">
<string>Disable compression of the file contents</string>
</property>
</widget>
</item>
<item row="0" column="0">
<widget class="QCheckBox" name="opt_no_inline_toc">
<property name="text">
<string>Do not add Table of Contents to book</string>
</property>
</widget>
</item>
</layout> </layout>
</widget> </widget>
<resources/> <resources/>