Fix bug #3104: FB2 output handle images with the same name in different directories.

This commit is contained in:
John Schember 2009-08-08 11:22:13 -04:00
parent fdac643e72
commit 3b23ff4518

View File

@ -9,8 +9,15 @@ Transform OEB content into FB2 markup
''' '''
import os import os
import cStringIO
from base64 import b64encode from base64 import b64encode
try:
from PIL import Image
Image
except ImportError:
import Image
from lxml import etree from lxml import etree
from calibre import prepare_string_for_xml from calibre import prepare_string_for_xml
@ -37,8 +44,10 @@ STYLES = [
] ]
class FB2MLizer(object): class FB2MLizer(object):
def __init__(self, log): def __init__(self, log):
self.log = log self.log = log
self.image_hrefs = {}
def extract_content(self, oeb_book, opts): def extract_content(self, oeb_book, opts):
self.log.info('Converting XHTML to FB2 markup...') self.log.info('Converting XHTML to FB2 markup...')
@ -47,6 +56,7 @@ class FB2MLizer(object):
return self.fb2mlize_spine() return self.fb2mlize_spine()
def fb2mlize_spine(self): def fb2mlize_spine(self):
self.image_hrefs = {}
output = self.fb2_header() output = self.fb2_header()
if 'titlepage' in self.oeb_book.guide: if 'titlepage' in self.oeb_book.guide:
self.log.debug('Generating cover page...') self.log.debug('Generating cover page...')
@ -54,11 +64,11 @@ class FB2MLizer(object):
item = self.oeb_book.manifest.hrefs[href] item = self.oeb_book.manifest.hrefs[href]
if item.spine_position is None: if item.spine_position is None:
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
for item in self.oeb_book.spine: for item in self.oeb_book.spine:
self.log.debug('Converting %s to FictionBook2 XML' % item.href) self.log.debug('Converting %s to FictionBook2 XML' % item.href)
stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile) stylizer = Stylizer(item.data, item.href, self.oeb_book, self.opts.output_profile)
output += self.dump_text(item.data.find(XHTML('body')), stylizer) output += self.dump_text(item.data.find(XHTML('body')), stylizer, item)
output += self.fb2_body_footer() output += self.fb2_body_footer()
output += self.fb2mlize_images() output += self.fb2mlize_images()
output += self.fb2_footer() output += self.fb2_footer()
@ -102,20 +112,29 @@ class FB2MLizer(object):
images = u'' images = u''
for item in self.oeb_book.manifest: for item in self.oeb_book.manifest:
if item.media_type in OEB_RASTER_IMAGES: if item.media_type in OEB_RASTER_IMAGES:
raw_data = b64encode(item.data) try:
# Don't put the encoded image on a single line. im = Image.open(cStringIO.StringIO(item.data))
data = '' data = cStringIO.StringIO()
col = 1 im.save(data, 'JPEG')
for char in raw_data: data = data.getvalue()
if col == 72:
data += '\n' raw_data = b64encode(data)
col = 1 # Don't put the encoded image on a single line.
col += 1 data = ''
data += char col = 1
images += '<binary id="%s" content-type="%s">%s\n</binary>' % (os.path.basename(item.href), item.media_type, data) for char in raw_data:
if col == 72:
data += '\n'
col = 1
col += 1
data += char
images += '<binary id="%s" content-type="%s">%s\n</binary>' % (self.image_hrefs.get(item.href, '0000.JPEG'), item.media_type, data)
except Exception as e:
self.log.error('Error: Could not include file %s becuase ' \
'%s.' % (item.href, e))
return images return images
def dump_text(self, elem, stylizer, tag_stack=[]): def dump_text(self, elem, stylizer, page, tag_stack=[]):
if not isinstance(elem.tag, basestring) \ if not isinstance(elem.tag, basestring) \
or namespace(elem.tag) != XHTML_NS: or namespace(elem.tag) != XHTML_NS:
return u'' return u''
@ -131,7 +150,10 @@ class FB2MLizer(object):
tag_count = 0 tag_count = 0
if tag == 'img': if tag == 'img':
fb2_text += '<image xlink:href="#%s" />' % os.path.basename(elem.attrib['src']) if page.abshref(elem.attrib['src']) not in self.image_hrefs.keys():
self.image_hrefs[page.abshref(elem.attrib['src'])] = '%s.jpg' % len(self.image_hrefs.keys())
fb2_text += '<image xlink:href="#%s" />' % self.image_hrefs[page.abshref(elem.attrib['src'])]
fb2_tag = TAG_MAP.get(tag, None) fb2_tag = TAG_MAP.get(tag, None)
if fb2_tag and fb2_tag not in tag_stack: if fb2_tag and fb2_tag not in tag_stack:
@ -155,7 +177,7 @@ class FB2MLizer(object):
fb2_text += prepare_string_for_xml(elem.text) fb2_text += prepare_string_for_xml(elem.text)
for item in elem: for item in elem:
fb2_text += self.dump_text(item, stylizer, tag_stack) fb2_text += self.dump_text(item, stylizer, page, tag_stack)
close_tag_list = [] close_tag_list = []
for i in range(0, tag_count): for i in range(0, tag_count):