From 8b7d3cf4cccfc72f0fbb32d5bc313bcabd4e966d Mon Sep 17 00:00:00 2001 From: "Marshall T. Vandegrift" Date: Wed, 7 Jan 2009 00:35:58 -0500 Subject: [PATCH] More fixes and improvements. Etc etc etc. --- src/calibre/ebooks/mobi/mobiml.py | 13 ++- src/calibre/ebooks/mobi/writer.py | 84 +++++++++++-------- src/calibre/ebooks/oeb/transforms/flatcss.py | 23 +++-- .../ebooks/oeb/transforms/rasterize.py | 2 +- 4 files changed, 78 insertions(+), 44 deletions(-) diff --git a/src/calibre/ebooks/mobi/mobiml.py b/src/calibre/ebooks/mobi/mobiml.py index 1fac55648b..199566271b 100644 --- a/src/calibre/ebooks/mobi/mobiml.py +++ b/src/calibre/ebooks/mobi/mobiml.py @@ -40,6 +40,7 @@ class BlockState(object): self.nested = [] self.para = None self.inline = None + self.anchor = None self.vpadding = 0. self.vmargin = 0. self.pbreak = False @@ -136,6 +137,7 @@ class MobiMLizer(object): etree.SubElement(body, 'a', attrib={'id': id}) istate.ids.clear() bstate.istate = None + bstate.anchor = None parent = bstate.nested[-1] if bstate.nested else bstate.body indent = istate.indent left = istate.left @@ -190,6 +192,13 @@ class MobiMLizer(object): valign = istate.valign fsize = istate.fsize href = istate.href + if not href: + bstate.anchor = None + elif pstate and pstate.href == href: + inline = bstate.anchor + else: + inline = etree.SubElement(inline, 'a', href=href) + bstate.anchor = inline if valign == 'super': inline = etree.SubElement(inline, 'sup') elif valign == 'sub': @@ -202,8 +211,6 @@ class MobiMLizer(object): inline = etree.SubElement(inline, 'i') if istate.bold: inline = etree.SubElement(inline, 'b') - if href: - inline = etree.SubElement(inline, 'a', href=href) bstate.inline = inline bstate.istate = istate inline = bstate.inline @@ -254,7 +261,7 @@ class MobiMLizer(object): bstate.vpadding += bstate.vmargin bstate.vmargin = 0 bstate.vpadding += vpadding - else: + elif not istate.href: margin = asfloat(style['margin-left']) padding = asfloat(style['padding-left']) lspace = margin + padding diff --git a/src/calibre/ebooks/mobi/writer.py b/src/calibre/ebooks/mobi/writer.py index 34ffe77e75..ed732b200d 100644 --- a/src/calibre/ebooks/mobi/writer.py +++ b/src/calibre/ebooks/mobi/writer.py @@ -30,6 +30,13 @@ from calibre.ebooks.mobi.palmdoc import compress_doc from calibre.ebooks.mobi.langcodes import iana2mobi from calibre.ebooks.mobi.mobiml import MBP_NS, MBP, MobiMLizer +# TODO: +# - Image scaling +# - Clean unused files +# - Override CSS +# - Generate in-content ToC +# - Command line options, etc. + EXTH_CODES = { 'creator': 100, 'publisher': 101, @@ -51,6 +58,10 @@ UNCOMPRESSED = 1 PALMDOC = 2 HUFFDIC = 17480 +MAX_IMAGE_SIZE = 63 * 1024 +MAX_THUMB_SIZE = 16 * 1024 +MAX_THUMB_DIMEN = (180, 240) + def encode(data): return data.encode('utf-8') @@ -265,28 +276,26 @@ class MobiWriter(object): size = len(last) + 1 text.seek(npos - size) last = text.read(size) + extra = 0 try: last.decode('utf-8') except UnicodeDecodeError: - pass - else: - text.seek(pos) - return text.read(RECORD_SIZE) - prev = len(last) - while True: - text.seek(npos - prev) - last = text.read(len(last) + 1) - try: - last.decode('utf-8') - except UnicodeDecodeError: - pass - else: - break - extra = len(last) - prev + prev = len(last) + while True: + text.seek(npos - prev) + last = text.read(len(last) + 1) + try: + last.decode('utf-8') + except UnicodeDecodeError: + pass + else: + break + extra = len(last) - prev text.seek(pos) - data = text.read(RECORD_SIZE + extra) + data = text.read(RECORD_SIZE) + overlap = text.read(extra) text.seek(npos) - return data + return data, overlap def _generate_text(self): serializer = Serializer(self._oeb, self._images) @@ -296,14 +305,14 @@ class MobiWriter(object): text = StringIO(text) nrecords = 0 offset = 0 - data = self._read_text_record(text) + data, overlap = self._read_text_record(text) while len(data) > 0: - size = len(data) if self._compression == PALMDOC: data = compress_doc(data) record = StringIO() record.write(data) - record.write(pack('>B', max((0, size - RECORD_SIZE)))) + record.write(overlap) + record.write(pack('>B', len(overlap))) nextra = 0 pbreak = 0 running = offset @@ -317,7 +326,7 @@ class MobiWriter(object): self._records.append(record.getvalue()) nrecords += 1 offset += RECORD_SIZE - data = self._read_text_record(text) + data, overlap = self._read_text_record(text) self._text_nrecords = nrecords def _rescale_image(self, data, maxsizeb, dimen=None): @@ -334,7 +343,7 @@ class MobiWriter(object): data = StringIO() image.save(data, format) data = data.getvalue() - if len(data) < maxsizeb: + if len(data) <= maxsizeb: return data image = image.convert('RGBA') for quality in xrange(95, -1, -1): @@ -342,7 +351,19 @@ class MobiWriter(object): image.save(data, 'JPEG', quality=quality) data = data.getvalue() if len(data) <= maxsizeb: - break + return data + width, height = image.size + for scale in xrange(99, 0, -1): + scale = scale / 100. + data = StringIO() + scaled = image.copy() + size = (int(width * scale), (height * scale)) + scaled.thumbnail(size, Image.ANTIALIAS) + scaled.save(data, 'JPEG', quality=0) + data = data.getvalue() + if len(data) <= maxsizeb: + return data + # Well, we tried? return data def _generate_images(self): @@ -352,9 +373,7 @@ class MobiWriter(object): coverid = metadata.cover[0] if metadata.cover else None for _, href in images: item = self._oeb.manifest.hrefs[href] - maxsizek = 89 if coverid == item.id else 63 - maxsizeb = maxsizek * 1024 - data = self._rescale_image(item.data, maxsizeb) + data = self._rescale_image(item.data, MAX_IMAGE_SIZE) self._records.append(data) def _generate_record0(self): @@ -398,7 +417,7 @@ class MobiWriter(object): if term not in EXTH_CODES: continue code = EXTH_CODES[term] for item in oeb.metadata[term]: - data = str(item) + data = unicode(item).encode('utf-8') exth.write(pack('>II', code, len(data) + 8)) exth.write(data) nrecs += 1 @@ -419,9 +438,7 @@ class MobiWriter(object): return ''.join(exth) def _add_thumbnail(self, item): - maxsizeb = 16 * 1024 - dimen = (180, 240) - data = self._rescale_image(item.data, maxsizeb, dimen) + data = self._rescale_image(item.data, MAX_THUMB_SIZE, MAX_THUMB_DIMEN) manifest = self._oeb.manifest id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') manifest.add(id, href, 'image/jpeg', data=data) @@ -459,12 +476,13 @@ def main(argv=sys.argv): #writer = DirWriter() fbase = context.dest.fbase fkey = context.dest.fnums.values() - flattener = CSSFlattener(unfloat=True, fbase=fbase, fkey=fkey) + flattener = CSSFlattener(fbase=fbase, fkey=fkey, unfloat=True, + untable=True) rasterizer = SVGRasterizer() mobimlizer = MobiMLizer() - flattener.transform(oeb, context) + #flattener.transform(oeb, context) rasterizer.transform(oeb, context) - mobimlizer.transform(oeb, context) + #mobimlizer.transform(oeb, context) writer.dump(oeb, outpath) return 0 diff --git a/src/calibre/ebooks/oeb/transforms/flatcss.py b/src/calibre/ebooks/oeb/transforms/flatcss.py index 67f9a7c868..86ae61d314 100644 --- a/src/calibre/ebooks/oeb/transforms/flatcss.py +++ b/src/calibre/ebooks/oeb/transforms/flatcss.py @@ -79,11 +79,13 @@ def FontMapper(sbase=None, dbase=None, dkey=None): class CSSFlattener(object): - def __init__(self, unfloat=False, fbase=None, fkey=None, lineh=None): - self.unfloat = unfloat + def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False, + untable=False): self.fbase = fbase self.fkey = fkey self.lineh = lineh + self.unfloat = unfloat + self.untable = untable def transform(self, oeb, context): self.oeb = oeb @@ -180,12 +182,19 @@ class CSSFlattener(object): cssdict['margin-left'] = "%d%%" % (percent * 100) left -= style['text-indent'] if self.unfloat and 'float' in cssdict \ - and tag not in ('img', 'object'): - if cssdict.get('display', 'none') != 'none': + and tag not in ('img', 'object') \ + and cssdict.get('display', 'none') != 'none': del cssdict['display'] - if 'vertical-align' in cssdict: - if cssdict['vertical-align'] == 'sup': - cssdict['vertical-align'] = 'super' + if self.untable and 'display' in cssdict \ + and cssdict['display'].startswith('table'): + display = cssdict['display'] + if display == 'table-cell': + cssdict['display'] = 'inline' + else: + cssdict['display'] = 'block' + if 'vertical-align' in cssdict \ + and cssdict['vertical-align'] == 'sup': + cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict: lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh diff --git a/src/calibre/ebooks/oeb/transforms/rasterize.py b/src/calibre/ebooks/oeb/transforms/rasterize.py index 90d7a07579..b57736beb9 100644 --- a/src/calibre/ebooks/oeb/transforms/rasterize.py +++ b/src/calibre/ebooks/oeb/transforms/rasterize.py @@ -171,7 +171,7 @@ class SVGRasterizer(object): cover = self.oeb.manifest.ids[str(covers[0])] if not cover.media_type == SVG_MIME: return - data = self.rasterize_svg(cover.data, 600, 800) + data = self.rasterize_svg(cover.data, 500, 800) href = os.path.splitext(cover.href)[0] + '.png' id, href = self.oeb.manifest.generate(cover.id, href) self.oeb.manifest.add(id, href, PNG_MIME, data=data)