From 644335d97b1494ee04c3c657d435a3aeef44551c Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 16 Apr 2011 21:23:13 -0400 Subject: [PATCH] Ignore non internal links. Support composite images. --- src/calibre/ebooks/pdb/plucker/reader.py | 109 ++++++++++++++++++++--- 1 file changed, 99 insertions(+), 10 deletions(-) diff --git a/src/calibre/ebooks/pdb/plucker/reader.py b/src/calibre/ebooks/pdb/plucker/reader.py index 171c051bbd..c6c404b125 100644 --- a/src/calibre/ebooks/pdb/plucker/reader.py +++ b/src/calibre/ebooks/pdb/plucker/reader.py @@ -13,10 +13,9 @@ import zlib from collections import OrderedDict from calibre import CurrentDir -from calibre.ebooks.metadata.opf2 import OPFCreator from calibre.ebooks.pdb.formatreader import FormatReader from calibre.ptempfile import TemporaryFile -from calibre.utils.magick import Image +from calibre.utils.magick import Image, create_canvas DATATYPE_PHTML = 0 DATATYPE_PHTML_COMPRESSED = 1 @@ -178,6 +177,7 @@ class SectionHeaderText(object): self.sizes.append(struct.unpack('>H', raw[adv:2+adv])[0]) self.attributes.append(struct.unpack('>H', raw[2+adv:4+adv])[0]) + class SectionMetadata(object): def __init__(self, raw): @@ -220,6 +220,7 @@ class SectionMetadata(object): adv += 2*length + class SectionText(object): def __init__(self, section_header, raw): @@ -227,6 +228,34 @@ class SectionText(object): self.data = raw[section_header.paragraphs * 4:] +class SectionCompositeImage(object): + + def __init__(self, raw): + self.columns, = struct.unpack('>H', raw[0:2]) + self.rows, = struct.unpack('>H', raw[2:4]) + + # [ + # row [col, col, col...], + # row [col, col, col...], + # ... + # ] + # + # Each item in the layout is in it's + # correct position in the final + # composite. + # + # Each item in the layout is a uid + # to an image record. + self.layout = [] + offset = 4 + for i in xrange(self.rows): + col = [] + for j in xrange(self.columns): + col.append(struct.unpack('>H', raw[offset:offset+2])[0]) + offset += 2 + self.layout.append(col) + + class Reader(FormatReader): def __init__(self, header, stream, log, options): @@ -240,6 +269,7 @@ class Reader(FormatReader): self.uid_text_secion_number = OrderedDict() self.uid_text_secion_encoding = {} self.uid_image_section_number = {} + self.uid_composite_image_section_number = {} self.metadata_section_number = None self.default_encoding = 'utf-8' self.owner_id = None @@ -266,8 +296,9 @@ class Reader(FormatReader): elif section_header.type == DATATYPE_METADATA: self.metadata_section_number = section_number section = SectionMetadata(raw_data[start:]) - #elif section_header.type == DATATYPE_COMPOSITE_IMAGE: - + elif section_header.type == DATATYPE_COMPOSITE_IMAGE: + self.uid_composite_image_section_number[section_header.uid] = section_number + section = SectionCompositeImage(raw_data[start:]) self.sections.append((section_header, section)) @@ -282,6 +313,9 @@ class Reader(FormatReader): self.mi = get_metadata(stream, False) def extract_content(self, output_dir): + # Each text record is independent (unless the continuation + # value is set in the previous record). Put each converted + # text recored into a separate file. with CurrentDir(output_dir): for uid, num in self.uid_text_secion_number.items(): self.log.debug(_('Writing record with uid: %s as %s.html' % (uid, uid))) @@ -297,9 +331,11 @@ class Reader(FormatReader): htmlf.write(html.encode('utf-8')) images = [] + image_sizes = {} if not os.path.exists(os.path.join(output_dir, 'images/')): os.makedirs(os.path.join(output_dir, 'images/')) with CurrentDir(os.path.join(output_dir, 'images/')): + # Single images. for uid, num in self.uid_image_section_number.items(): section_header, section_data = self.sections[num] if section_data: @@ -317,6 +353,7 @@ class Reader(FormatReader): itf.write(idata) im = Image() im.read(itn) + image_sizes[uid] = im.size im.set_compression_quality(70) im.save('%s.jpg' % uid) self.log.debug('Wrote image with uid %s to images/%s.jpg' % (uid, uid)) @@ -325,6 +362,49 @@ class Reader(FormatReader): images.append('%s.jpg' % uid) else: self.log.error('Failed to write image with uid %s: No data.' % uid) + # Composite images. + for uid, num in self.uid_composite_image_section_number.items(): + try: + section_header, section_data = self.sections[num] + # Get the final width and height. + width = 0 + height = 0 + for row in section_data.layout: + row_width = 0 + col_height = 0 + for col in row: + if col not in image_sizes: + raise Exception('Image with uid: %s missing.' % col) + im = Image() + im.read('%s.jpg' % col) + w, h = im.size + row_width += w + if col_height < h: + col_height = h + if width < row_width: + width = row_width + height += col_height + # Create a new image the total size of all image + # parts. Put the parts into the new image. + canvas = create_canvas(width, height) + y_off = 0 + for row in section_data.layout: + x_off = 0 + largest_height = 0 + for col in row: + im = Image() + im.read('%s.jpg' % col) + canvas.compose(im, x_off, y_off) + w, h = im.size + x_off += w + if largest_height < h: + largest_height = h + y_off += largest_height + canvas.set_compression_quality(70) + canvas.save('%s.jpg' % uid) + self.log.debug('Wrote composite image with uid %s to images/%s.jpg' % (uid, uid)) + except Exception as e: + self.log.error('Failed to write composite image with uid %s: %s' % (uid, e)) # Run the HTML through the html processing plugin. from calibre.customize.ui import plugin_for_input_format @@ -334,13 +414,17 @@ class Reader(FormatReader): self.options.input_encoding = 'utf-8' odi = self.options.debug_pipeline self.options.debug_pipeline = None - # Generate oeb from html conversion. + # Determine the home.html record uid. This should be set in the + # reserved values in the metadata recored. home.html is the first + # text record (should have hyper link references to other records) + # in the document. try: home_html = self.header_record.home_html if not home_html: home_html = self.uid_text_secion_number.items()[0][0] except: raise Exception(_('Could not determine home.html')) + # Generate oeb from html conversion. oeb = html_input.convert(open('%s.html' % home_html, 'rb'), self.options, 'html', self.log, {}) self.options.debug_pipeline = odi @@ -359,6 +443,7 @@ class Reader(FormatReader): html = u'

' offset = 0 paragraph_open = True + link_open = False need_set_p_id = False p_num = 1 paragraph_offsets = [] @@ -387,14 +472,15 @@ class Reader(FormatReader): if c == 0x0a: offset += 1 id = struct.unpack('>H', d[offset:offset+2])[0] - html += '' % id + if id in self.uid_text_secion_number: + html += '' % id + link_open = True offset += 1 # Targeted page link begins # 3 Bytes # record ID, target elif c == 0x0b: offset += 3 - html += '' # Paragraph link begins # 4 Bytes # record ID, paragraph number @@ -403,18 +489,21 @@ class Reader(FormatReader): id = struct.unpack('>H', d[offset:offset+2])[0] offset += 2 pid = struct.unpack('>H', d[offset:offset+2])[0] - html += '' % (id, pid) + if id in self.uid_text_secion_number: + html += '' % (id, pid) + link_open = True offset += 1 # Targeted paragraph link begins # 5 Bytes # record ID, paragraph number, target elif c == 0x0d: offset += 5 - html += '' # Link ends # 0 Bytes elif c == 0x08: - html += '' + if link_open: + html += '' + link_open = False # Set font # 1 Bytes # font specifier