From 1f7c291aeb3bae171e5bff44d0ea47ba3fdd5524 Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 4 Dec 2010 20:37:16 -0500 Subject: [PATCH 1/6] FB2 Output: Use random uuid for book id. --- src/calibre/ebooks/fb2/fb2ml.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index d83dc45a0f..d89570a44e 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -12,6 +12,7 @@ import cStringIO from base64 import b64encode from datetime import datetime import re +import uuid try: from PIL import Image @@ -88,7 +89,8 @@ class FB2MLizer(object): metadata['appname'] = __appname__ metadata['version'] = __version__ metadata['date'] = '%i.%i.%i' % (datetime.now().day, datetime.now().month, datetime.now().year) - metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + metadata['lang'] = u''.join(self.oeb_book.metadata.lang) if self.oeb_book.metadata.lang else 'en' + metadata['id'] = '%s' % uuid.uuid4() author_parts = self.oeb_book.metadata.creator[0].value.split(' ') if len(author_parts) == 1: @@ -124,7 +126,7 @@ class FB2MLizer(object): '' \ '%(appname)s %(version)s' \ '%(date)s' \ - '1' \ + '%(id)s' \ '1.0' \ '' \ '' % metadata From 0ea35abaf189ded4ba2e39a6201c545c21e8290e Mon Sep 17 00:00:00 2001 From: John Schember Date: Sat, 4 Dec 2010 22:04:37 -0500 Subject: [PATCH 2/6] FB2 Output: Check image is in document and manifest before referencing and writing. --- src/calibre/ebooks/fb2/fb2ml.py | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index d89570a44e..3020c002a5 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -145,8 +145,14 @@ class FB2MLizer(object): return ''.join(text) + '' def fb2mlize_images(self): + ''' + This function uses the self.image_hrefs dictionary mapping. It is populated by the dump_text function. + ''' images = [] for item in self.oeb_book.manifest: + # Don't write the image if it's not referenced in the document's text. + if item.href not in self.image_hrefs: + continue if item.media_type in OEB_RASTER_IMAGES: try: im = Image.open(cStringIO.StringIO(item.data)).convert('RGB') @@ -164,7 +170,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('%s\n' % (self.image_hrefs.get(item.href, '_0000.JPEG'), item.media_type, data)) + images.append('%s\n' % (self.image_hrefs[item.href], item.media_type, data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) @@ -245,14 +251,15 @@ class FB2MLizer(object): fb2_out.append('') tags.append('title') if tag == 'img': - # TODO: Check that the image is in the manifest and only write the tag if it is. if elem_tree.attrib.get('src', None): - if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys(): - self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) - p_txt, p_tag = self.ensure_p() - fb2_out += p_txt - tags += p_tag - fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) + # Only write the image tag if it is in the manifest. + if page.abshref(elem_tree.attrib['src']) in self.oeb_book.manifest.hrefs.keys(): + if page.abshref(elem_tree.attrib['src']) not in self.image_hrefs.keys(): + self.image_hrefs[page.abshref(elem_tree.attrib['src'])] = '_%s.jpg' % len(self.image_hrefs.keys()) + p_txt, p_tag = self.ensure_p() + fb2_out += p_txt + tags += p_tag + fb2_out.append('<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem_tree.attrib['src'])]) elif tag == 'br': if self.in_p: closed_tags = [] From 596c8b905bd7280093cc84eb57bef674326a63a0 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:14:30 -0500 Subject: [PATCH 3/6] FB2 Output: SVG rasterization. --- src/calibre/ebooks/fb2/output.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/calibre/ebooks/fb2/output.py b/src/calibre/ebooks/fb2/output.py index 88508b83e0..33714c6e6e 100644 --- a/src/calibre/ebooks/fb2/output.py +++ b/src/calibre/ebooks/fb2/output.py @@ -29,6 +29,14 @@ class FB2Output(OutputFormatPlugin): def convert(self, oeb_book, output_path, input_plugin, opts, log): from calibre.ebooks.oeb.transforms.jacket import linearize_jacket + from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable + + try: + rasterizer = SVGRasterizer() + rasterizer(oeb_book, opts) + except Unavailable: + self.log.warn('SVG rasterizer unavailable, SVG will not be converted') + linearize_jacket(oeb_book) fb2mlizer = FB2MLizer(log) From f7d9571c4c51f8640841f3dfd45af7a71d3fff12 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:37:34 -0500 Subject: [PATCH 4/6] FB2 Output: Replace PIL with ImageMagick. Don't convert JPG images to JPG because it's unnecessary. --- src/calibre/ebooks/fb2/fb2ml.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 3020c002a5..b04cb50d46 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -8,18 +8,12 @@ __docformat__ = 'restructuredtext en' Transform OEB content into FB2 markup ''' -import cStringIO from base64 import b64encode from datetime import datetime +from mimetypes import types_map import re import uuid -try: - from PIL import Image - Image -except ImportError: - import Image - from lxml import etree from calibre import prepare_string_for_xml @@ -27,6 +21,7 @@ from calibre.constants import __appname__, __version__ from calibre.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace from calibre.ebooks.oeb.stylizer import Stylizer from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES +from calibre.utils.magick import Image class FB2MLizer(object): ''' @@ -155,11 +150,11 @@ class FB2MLizer(object): continue if item.media_type in OEB_RASTER_IMAGES: try: - im = Image.open(cStringIO.StringIO(item.data)).convert('RGB') - data = cStringIO.StringIO() - im.save(data, 'JPEG') - data = data.getvalue() - + if not item.media_type == types_map['.jpeg'] or not item.media_type == types_map['.jpg']: + im = Image() + im.load(item.data) + im.set_compression_quality(70) + data = im.export('jpg') raw_data = b64encode(data) # Don't put the encoded image on a single line. data = '' From 4d3e99af6733a7ed7bba89297971b8847d5483d9 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 22:59:37 -0500 Subject: [PATCH 5/6] FB2 Output: Fix writing incorrect mimetype. --- src/calibre/ebooks/fb2/fb2ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 479cd4d789..0748970c60 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -165,7 +165,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs[item.href], item.media_type, data)) + images.append('<binary id="%s">%s\n</binary>' % (self.image_hrefs[item.href], data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e)) From fcd87f216c6172443f3245b1ad117435aca516c9 Mon Sep 17 00:00:00 2001 From: John Schember <john@nachtimwald.com> Date: Sat, 4 Dec 2010 23:02:55 -0500 Subject: [PATCH 6/6] FB2 Output: Add image mimetype back and set to jpg because that is what is written. --- src/calibre/ebooks/fb2/fb2ml.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 0748970c60..46861357e6 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -165,7 +165,7 @@ class FB2MLizer(object): col = 1 col += 1 data += char - images.append('<binary id="%s">%s\n</binary>' % (self.image_hrefs[item.href], data)) + images.append('<binary id="%s" content-type="image/jpeg">%s\n</binary>' % (self.image_hrefs[item.href], data)) except Exception as e: self.log.error('Error: Could not include file %s because ' \ '%s.' % (item.href, e))