diff --git a/src/calibre/ebooks/fb2/fb2ml.py b/src/calibre/ebooks/fb2/fb2ml.py index 357bce0b22..c77cb876f8 100644 --- a/src/calibre/ebooks/fb2/fb2ml.py +++ b/src/calibre/ebooks/fb2/fb2ml.py @@ -9,8 +9,15 @@ Transform OEB content into FB2 markup ''' import os +import cStringIO from base64 import b64encode +try: + from PIL import Image + Image +except ImportError: + import Image + from lxml import etree from calibre import prepare_string_for_xml @@ -37,8 +44,10 @@ STYLES = [ ] class FB2MLizer(object): + def __init__(self, log): self.log = log + self.image_hrefs = {} def extract_content(self, oeb_book, opts): self.log.info('Converting XHTML to FB2 markup...') @@ -47,6 +56,7 @@ class FB2MLizer(object): return self.fb2mlize_spine() def fb2mlize_spine(self): + self.image_hrefs = {} output = self.fb2_header() if 'titlepage' in self.oeb_book.guide: self.log.debug('Generating cover page...') @@ -54,11 +64,11 @@ class FB2MLizer(object): item = self.oeb_book.manifest.hrefs[href] if item.spine_position is None: stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) - output += self.dump_text(item.data.find(XHTML('body')), stylizer) + output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) for item in self.oeb_book.spine: self.log.debug('Converting %s to FictionBook2 XML' % item.href) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) - output += self.dump_text(item.data.find(XHTML('body')), stylizer) + output += self.dump_text(item.data.find(XHTML('body')), stylizer, item) output += self.fb2_body_footer() output += self.fb2mlize_images() output += self.fb2_footer() @@ -102,20 +112,29 @@ class FB2MLizer(object): images = u'' for item in self.oeb_book.manifest: if item.media_type in OEB_RASTER_IMAGES: - raw_data = b64encode(item.data) - # Don't put the encoded image on a single line. - data = '' - col = 1 - for char in raw_data: - if col == 72: - data += '\n' - col = 1 - col += 1 - data += char - images += '%s\n' % (os.path.basename(item.href), item.media_type, data) + try: + im = Image.open(cStringIO.StringIO(item.data)) + data = cStringIO.StringIO() + im.save(data, 'JPEG') + data = data.getvalue() + + raw_data = b64encode(data) + # Don't put the encoded image on a single line. + data = '' + col = 1 + for char in raw_data: + if col == 72: + data += '\n' + col = 1 + col += 1 + data += char + images += '%s\n' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data) + except Exception as e: + self.log.error('Error: Could not include file %s becuase ' \ + '%s.' % (item.href, e)) return images - def dump_text(self, elem, stylizer, tag_stack=[]): + def dump_text(self, elem, stylizer, page, tag_stack=[]): if not isinstance(elem.tag, basestring) \ or namespace(elem.tag) != XHTML_NS: return u'' @@ -131,7 +150,10 @@ class FB2MLizer(object): tag_count = 0 if tag == 'img': - fb2_text += '' % os.path.basename(elem.attrib['src']) + if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys(): + self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys()) + fb2_text += '' % self.image_hrefs[page.abshref(elem.attrib['src'])] + fb2_tag = TAG_MAP.get(tag, None) if fb2_tag and fb2_tag not in tag_stack: @@ -155,7 +177,7 @@ class FB2MLizer(object): fb2_text += prepare_string_for_xml(elem.text) for item in elem: - fb2_text += self.dump_text(item, stylizer, tag_stack) + fb2_text += self.dump_text(item, stylizer, page, tag_stack) close_tag_list = [] for i in range(0, tag_count):