From 0930375551028af494baa8e3c50781a812dff0fd Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 12:38:01 -0600 Subject: [PATCH 1/6] New MOBI Output: Clean up image handling --- src/calibre/ebooks/__init__.py | 42 +++++++ src/calibre/ebooks/mobi/writer2/indexer.py | 12 +- src/calibre/ebooks/mobi/writer2/main.py | 131 ++++++++++----------- src/calibre/web/feeds/news.py | 37 +----- 4 files changed, 114 insertions(+), 108 deletions(-) diff --git a/src/calibre/ebooks/__init__.py b/src/calibre/ebooks/__init__.py index c1dee85d3e..50ad2b0b50 100644 --- a/src/calibre/ebooks/__init__.py +++ b/src/calibre/ebooks/__init__.py @@ -8,6 +8,7 @@ from various formats. ''' import traceback, os, re +from cStringIO import StringIO from calibre import CurrentDir class ConversionError(Exception): @@ -209,4 +210,45 @@ def unit_convert(value, base, font, dpi): result = value * 0.40 return result +def generate_masthead(title, output_path=None, width=600, height=60): + from calibre.ebooks.conversion.config import load_defaults + from calibre.utils.fonts import fontconfig + font_path = default_font = P('fonts/liberation/LiberationSerif-Bold.ttf') + recs = load_defaults('mobi_output') + masthead_font_family = recs.get('masthead_font', 'Default') + + if masthead_font_family != 'Default': + masthead_font = fontconfig.files_for_family(masthead_font_family) + # Assume 'normal' always in dict, else use default + # {'normal': (path_to_font, friendly name)} + if 'normal' in masthead_font: + font_path = masthead_font['normal'][0] + + if not font_path or not os.access(font_path, os.R_OK): + font_path = default_font + + try: + from PIL import Image, ImageDraw, ImageFont + Image, ImageDraw, ImageFont + except ImportError: + import Image, ImageDraw, ImageFont + + img = Image.new('RGB', (width, height), 'white') + draw = ImageDraw.Draw(img) + try: + font = ImageFont.truetype(font_path, 48) + except: + font = ImageFont.truetype(default_font, 48) + text = title.encode('utf-8') + width, height = draw.textsize(text, font=font) + left = max(int((width - width)/2.), 0) + top = max(int((height - height)/2.), 0) + draw.text((left, top), text, fill=(0,0,0), font=font) + if output_path is None: + f = StringIO() + img.save(f, 'JPEG') + return f.getvalue() + else: + with open(output_path, 'wb') as f: + img.save(f, 'JPEG') diff --git a/src/calibre/ebooks/mobi/writer2/indexer.py b/src/calibre/ebooks/mobi/writer2/indexer.py index 15207c0230..54f74c1664 100644 --- a/src/calibre/ebooks/mobi/writer2/indexer.py +++ b/src/calibre/ebooks/mobi/writer2/indexer.py @@ -14,7 +14,7 @@ from collections import OrderedDict, defaultdict from calibre.ebooks.mobi.writer2 import RECORD_SIZE from calibre.ebooks.mobi.utils import (encint, encode_number_as_hex, - encode_tbs, align_block, utf8_text, detect_periodical) + encode_tbs, align_block, utf8_text) class CNCX(object): # {{{ @@ -323,16 +323,22 @@ class TBS(object): # {{{ class Indexer(object): # {{{ def __init__(self, serializer, number_of_text_records, - size_of_last_text_record, opts, oeb): + size_of_last_text_record, masthead_offset, is_periodical, + opts, oeb): self.serializer = serializer self.number_of_text_records = number_of_text_records self.text_size = (RECORD_SIZE * (self.number_of_text_records-1) + size_of_last_text_record) + self.masthead_offset = masthead_offset + self.oeb = oeb self.log = oeb.log self.opts = opts - self.is_periodical = detect_periodical(self.oeb.toc, self.log) + self.is_periodical = is_periodical + if self.is_periodical and self.masthead_offset is None: + raise ValueError('Periodicals must have a masthead') + self.log('Generating MOBI index for a %s'%('periodical' if self.is_periodical else 'book')) self.is_flat_periodical = False diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index 881f2bd199..e232e626ce 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -11,7 +11,7 @@ import re, random, time from cStringIO import StringIO from struct import pack -from calibre.ebooks import normalize +from calibre.ebooks import normalize, generate_masthead from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES from calibre.ebooks.mobi.writer2.serializer import Serializer from calibre.ebooks.compression.palmdoc import compress_doc @@ -19,7 +19,7 @@ from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.utils.filenames import ascii_filename from calibre.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED, RECORD_SIZE) from calibre.ebooks.mobi.utils import (rescale_image, encint, - encode_trailing_data, align_block) + encode_trailing_data, align_block, detect_periodical) from calibre.ebooks.mobi.writer2.indexer import Indexer EXTH_CODES = { @@ -35,6 +35,9 @@ EXTH_CODES = { 'type': 111, 'source': 112, 'versionnumber': 114, + 'coveroffset': 201, + 'thumboffset': 202, + 'hasfakecover': 203, 'lastupdatetime': 502, 'title': 503, } @@ -79,13 +82,12 @@ class MobiWriter(object): self.write_content() def generate_content(self): - self.map_image_names() + self.is_periodical = detect_periodical(self.oeb.toc, self.oeb.log) + self.generate_images() self.generate_text() # Index records come after text records self.generate_index() self.write_uncrossable_breaks() - # Image records come after index records - self.generate_images() # Indexing {{{ def generate_index(self): @@ -93,6 +95,7 @@ class MobiWriter(object): try: self.indexer = Indexer(self.serializer, self.last_text_record_idx, len(self.records[self.last_text_record_idx]), + self.masthead_offset, self.is_periodical, self.opts, self.oeb) except: self.log.exception('Failed to generate MOBI index:') @@ -104,11 +107,6 @@ class MobiWriter(object): self.records[i] += encode_trailing_data(tbs) self.records.extend(self.indexer.records) - @property - def is_periodical(self): - return (self.primary_index_record_idx is None or not - self.indexer.is_periodical) - # }}} def write_uncrossable_breaks(self): # {{{ @@ -138,58 +136,51 @@ class MobiWriter(object): # }}} # Images {{{ - def map_image_names(self): - ''' - Map image names to record indices, ensuring that the masthead image if - present has index number 1. - ''' - index = 1 - self.images = images = {} - mh_href = None - - if 'masthead' in self.oeb.guide: - mh_href = self.oeb.guide['masthead'].href - images[mh_href] = 1 - index += 1 - - for item in self.oeb.manifest.values(): - if item.media_type in OEB_RASTER_IMAGES: - if item.href == mh_href: continue - images[item.href] = index - index += 1 def generate_images(self): - self.oeb.logger.info('Serializing images...') - images = [(index, href) for href, index in self.images.iteritems()] - images.sort() - self.first_image_record = None - for _, href in images: - item = self.oeb.manifest.hrefs[href] + oeb = self.oeb + oeb.logger.info('Serializing images...') + self.image_records = [] + + mh_href = self.masthead_offset = None + if 'masthead' in oeb.guide: + mh_href = oeb.guide['masthead'].href + elif self.is_periodical: + # Generate a default masthead + data = generate_masthead(unicode(self.oeb.metadata('title')[0])) + self.image_records.append(data) + self.masthead_offset = 0 + + cover_href = self.cover_offset = self.thumbnail_offset = None + if (oeb.metadata.cover and + unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): + cover_id = unicode(oeb.metadata.cover[0]) + item = oeb.manifest.ids[cover_id] + cover_href = item.href + + for item in self.oeb.manifest.values(): + if item.media_type not in OEB_RASTER_IMAGES: continue try: data = rescale_image(item.data) except: - self.oeb.logger.warn('Bad image file %r' % item.href) + oeb.logger.warn('Bad image file %r' % item.href) continue + else: + if item.href == mh_href: + self.masthead_offset = len(self.image_records) - 1 + elif item.href == cover_href: + self.image_records.append(data) + self.cover_offset = len(self.image_records) - 1 + try: + data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, + maxsizeb=MAX_THUMB_SIZE) + except: + oeb.logger.warn('Failed to generate thumbnail') + else: + self.image_records.append(data) + self.thumbnail_offset = len(self.image_records) - 1 finally: item.unload_data_from_memory() - self.records.append(data) - if self.first_image_record is None: - self.first_image_record = len(self.records) - 1 - - def add_thumbnail(self, item): - try: - data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, - maxsizeb=MAX_THUMB_SIZE) - except IOError: - self.oeb.logger.warn('Bad image file %r' % item.href) - return None - manifest = self.oeb.manifest - id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') - manifest.add(id, href, 'image/jpeg', data=data) - index = len(self.images) + 1 - self.images[href] = index - self.records.append(data) - return index # }}} @@ -282,9 +273,13 @@ class MobiWriter(object): def generate_record0(self): # MOBI header {{{ metadata = self.oeb.metadata exth = self.build_exth() + first_image_record = None + if self.image_records: + first_image_record = len(self.records) + self.records.extend(self.image_records) last_content_record = len(self.records) - 1 - # FCIS/FLIS (Seem to server no purpose) + # FCIS/FLIS (Seems to serve no purpose) flis_number = len(self.records) self.records.append( b'FLIS\0\0\0\x08\0\x41\0\0\0\0\0\0\xff\xff\xff\xff\0\x01\0\x03\0\0\0\x03\0\0\0\x01'+ @@ -363,8 +358,7 @@ class MobiWriter(object): # 0x58 - 0x5b : Format version # 0x5c - 0x5f : First image record number record0.write(pack(b'>II', - 6, self.first_image_record if self.first_image_record else - len(self.records)-1)) + 6, first_image_record if first_image_record else len(self.records))) # 0x60 - 0x63 : First HUFF/CDIC record number # 0x64 - 0x67 : Number of HUFF/CDIC records @@ -539,20 +533,15 @@ class MobiWriter(object): exth.write(pack(b'>III', code, 12, val)) nrecs += 1 - if (oeb.metadata.cover and - unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): - id = unicode(oeb.metadata.cover[0]) - item = oeb.manifest.ids[id] - href = item.href - if href in self.images: - index = self.images[href] - 1 - exth.write(pack(b'>III', 0xc9, 0x0c, index)) - exth.write(pack(b'>III', 0xcb, 0x0c, 0)) - nrecs += 2 - index = self.add_thumbnail(item) - if index is not None: - exth.write(pack(b'>III', 0xca, 0x0c, index - 1)) - nrecs += 1 + if self.cover_offset is not None: + exth.write(pack(b'>III', EXTH_CODES['coveroffset'], 12, + self.cover_offset)) + exth.write(pack(b'>III', EXTH_CODES['hasfakecover'], 12, 0)) + nrecs += 2 + if self.thumbnail_offset is not None: + exth.write(pack(b'>III', EXTH_CODES['thumboffset'], 12, + self.thumbnail_offset)) + nrecs += 1 exth = exth.getvalue() trail = len(exth) % 4 diff --git a/src/calibre/web/feeds/news.py b/src/calibre/web/feeds/news.py index 9773f6d0d8..2017248cfc 100644 --- a/src/calibre/web/feeds/news.py +++ b/src/calibre/web/feeds/news.py @@ -1083,40 +1083,9 @@ class BasicNewsRecipe(Recipe): MI_HEIGHT = 60 def default_masthead_image(self, out_path): - from calibre.ebooks.conversion.config import load_defaults - from calibre.utils.fonts import fontconfig - font_path = default_font = P('fonts/liberation/LiberationSerif-Bold.ttf') - recs = load_defaults('mobi_output') - masthead_font_family = recs.get('masthead_font', 'Default') - - if masthead_font_family != 'Default': - masthead_font = fontconfig.files_for_family(masthead_font_family) - # Assume 'normal' always in dict, else use default - # {'normal': (path_to_font, friendly name)} - if 'normal' in masthead_font: - font_path = masthead_font['normal'][0] - - if not font_path or not os.access(font_path, os.R_OK): - font_path = default_font - - try: - from PIL import Image, ImageDraw, ImageFont - Image, ImageDraw, ImageFont - except ImportError: - import Image, ImageDraw, ImageFont - - img = Image.new('RGB', (self.MI_WIDTH, self.MI_HEIGHT), 'white') - draw = ImageDraw.Draw(img) - try: - font = ImageFont.truetype(font_path, 48) - except: - font = ImageFont.truetype(default_font, 48) - text = self.get_masthead_title().encode('utf-8') - width, height = draw.textsize(text, font=font) - left = max(int((self.MI_WIDTH - width)/2.), 0) - top = max(int((self.MI_HEIGHT - height)/2.), 0) - draw.text((left, top), text, fill=(0,0,0), font=font) - img.save(open(out_path, 'wb'), 'JPEG') + from calibre.ebooks import generate_masthead + generate_masthead(self.get_masthead_title(), output_path=out_path, + width=self.MI_WIDTH, height=self.MI_HEIGHT) def prepare_masthead_image(self, path_to_image, out_path): from calibre import fit_image From cf3184d8cd4505c37b94a19f4e68a6751b3e3253 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 12:48:07 -0600 Subject: [PATCH 2/6] MOBI Inspect: Ignore errors decoding TBS bytes --- src/calibre/ebooks/mobi/debug.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 91a203fa2b..91fc3d5e0f 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1192,8 +1192,7 @@ class TBSIndexing(object): # {{{ '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e, c, s)))) byts = bytearray(r.trailing_data.get('indexing', b'')) - sbyts = tuple(hex(b)[2:] for b in byts) - ans.append('TBS bytes: %s'%(' '.join(sbyts))) + ans.append('TBS bytes: %s'%format_bytes(byts)) for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)): if entries: ans.append('\t%s:'%typ) @@ -1220,8 +1219,14 @@ class TBSIndexing(object): # {{{ ans.append('Outermost index: %d'%outermost_index) ans.append('Unknown extra start bytes: %s'%repr_extra(extra)) if is_periodical: # Hierarchical periodical - byts, a = self.interpret_periodical(tbs_type, byts, + try: + byts, a = self.interpret_periodical(tbs_type, byts, dat['geom'][0]) + except: + import traceback + traceback.print_exc() + a = [] + print ('Failed to decode TBS bytes for record: %d'%r.idx) ans += a if byts: sbyts = tuple(hex(b)[2:] for b in byts) From 8721d5aa233ae6004e74308541e425f300d1ec4f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 15:16:40 -0600 Subject: [PATCH 3/6] Fix decoding of trailing data entries --- src/calibre/ebooks/mobi/debug.py | 2 +- src/calibre/ebooks/mobi/utils.py | 29 ++++++++++++++++++----------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 91fc3d5e0f..8d2f3ba41e 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1377,7 +1377,7 @@ class MOBIFile(object): # {{{ self.index_header, self.cncx) self.indexing_record_nums = set(xrange(pir, pir+2+self.index_header.num_of_cncx_blocks)) - self.secondary_index_record = self.secondary_index_record = None + self.secondary_index_record = self.secondary_index_header = None sir = self.mobi_header.secondary_index_record if sir != 0xffffffff: self.secondary_index_header = SecondaryIndexHeader(self.records[sir]) diff --git a/src/calibre/ebooks/mobi/utils.py b/src/calibre/ebooks/mobi/utils.py index 4298276bc1..e16871fec4 100644 --- a/src/calibre/ebooks/mobi/utils.py +++ b/src/calibre/ebooks/mobi/utils.py @@ -169,19 +169,26 @@ def get_trailing_data(record, extra_data_flags): :return: Trailing data, record - trailing data ''' data = OrderedDict() - for i in xrange(16, -1, -1): - flag = 1 << i # 2**i - if flag & extra_data_flags: - if i == 0: - # Only the first two bits are used for the size since there can - # never be more than 3 trailing multibyte chars - sz = (ord(record[-1]) & 0b11) + 1 - consumed = 1 - else: - sz, consumed = decint(record, forward=False) + flags = extra_data_flags >> 1 + + num = 0 + while flags: + num += 1 + if flags & 0b1: + sz, consumed = decint(record, forward=False) if sz > consumed: - data[i] = record[-sz:-consumed] + data[num] = record[-sz:-consumed] record = record[:-sz] + flags >>= 1 + # Read multibyte chars if any + if extra_data_flags & 0b1: + # Only the first two bits are used for the size since there can + # never be more than 3 trailing multibyte chars + sz = (ord(record[-1]) & 0b11) + 1 + consumed = 1 + if sz > consumed: + data[0] = record[-sz:-consumed] + record = record[:-sz] return data, record def encode_trailing_data(raw): From 7e19e0f75f35cfba9121ee9d008ad9824aae770c Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 16:10:17 -0600 Subject: [PATCH 4/6] ... --- src/calibre/ebooks/mobi/writer.py | 3 +++ src/calibre/ebooks/mobi/writer2/main.py | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index c9d931c918..55e3ad0b03 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -430,6 +430,7 @@ class MobiWriter(object): text.seek(npos) return data, overlap + # TBS {{{ def _generate_flat_indexed_navpoints(self): # Assemble a HTMLRecordData instance for each HTML record # Return True if valid, False if invalid @@ -1174,6 +1175,8 @@ class MobiWriter(object): self._tbSequence = tbSequence + # }}} + def _evaluate_periodical_toc(self): ''' Periodical: diff --git a/src/calibre/ebooks/mobi/writer2/main.py b/src/calibre/ebooks/mobi/writer2/main.py index e232e626ce..ab24b197d3 100644 --- a/src/calibre/ebooks/mobi/writer2/main.py +++ b/src/calibre/ebooks/mobi/writer2/main.py @@ -85,9 +85,11 @@ class MobiWriter(object): self.is_periodical = detect_periodical(self.oeb.toc, self.oeb.log) self.generate_images() self.generate_text() + # The uncrossable breaks trailing entries come before the indexing + # trailing entries + self.write_uncrossable_breaks() # Index records come after text records self.generate_index() - self.write_uncrossable_breaks() # Indexing {{{ def generate_index(self): From c84b67d139cedd51b73832d13376e372a43f5f08 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 16:39:15 -0600 Subject: [PATCH 5/6] ... --- src/calibre/ebooks/mobi/debug.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/calibre/ebooks/mobi/debug.py b/src/calibre/ebooks/mobi/debug.py index 8d2f3ba41e..aff8543624 100644 --- a/src/calibre/ebooks/mobi/debug.py +++ b/src/calibre/ebooks/mobi/debug.py @@ -1089,6 +1089,11 @@ class TextRecord(object): # {{{ self.trailing_data['uncrossable_breaks'] = self.trailing_data.pop(2) self.trailing_data['raw_bytes'] = raw_trailing_bytes + for typ, val in self.trailing_data.iteritems(): + if isinstance(typ, int): + print ('Record %d has unknown trailing data of type: %d : %r'% + (idx, typ, val)) + self.idx = idx def dump(self, folder): From cd2fd58afc7432c09992abd007a9e221fc5107cf Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 31 Jul 2011 18:55:14 -0600 Subject: [PATCH 6/6] Fix #819076 (Misspelling of Android in Welcome Wizard) --- src/calibre/translations/eu.po | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/translations/eu.po b/src/calibre/translations/eu.po index e8df74d5bb..4c37cfeca4 100644 --- a/src/calibre/translations/eu.po +++ b/src/calibre/translations/eu.po @@ -951,7 +951,7 @@ msgstr "Araztu saioa" #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:13 msgid "Communicate with Android phones." -msgstr "Adroid telefonoekin komunikatu." +msgstr "Android telefonoekin komunikatu." #: /home/kovid/work/calibre/src/calibre/devices/android/driver.py:113 msgid ""