From ba99c66fcdce7e68b5104557f00f07d58f21644f Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 29 Sep 2008 18:19:50 -0700 Subject: [PATCH] Handle EPUB files that have non image based covers. Fixes #1092 (EPUB Conversion Error) --- src/calibre/ebooks/epub/from_html.py | 132 ++++++++++++++++++--------- src/calibre/ebooks/metadata/opf2.py | 28 +++++- src/calibre/ptempfile.py | 6 +- 3 files changed, 119 insertions(+), 47 deletions(-) diff --git a/src/calibre/ebooks/epub/from_html.py b/src/calibre/ebooks/epub/from_html.py index 92bf18b32d..92fc8382ff 100644 --- a/src/calibre/ebooks/epub/from_html.py +++ b/src/calibre/ebooks/epub/from_html.py @@ -32,7 +32,8 @@ Conversion of HTML/OPF files follows several stages: * The EPUB container is created. ''' -import os, sys, re, cStringIO +import os, sys, re, cStringIO, logging +from contextlib import nested from lxml.etree import XPath try: @@ -118,6 +119,77 @@ def parse_content(filelist, opts, tdir): return resource_map, hp.htmlfile_map, toc +def resize_cover(im, opts): + width, height = im.size + dw, dh = (opts.profile.screen_size[0]-width)/float(width), (opts.profile.screen_size[1]-height)/float(height) + delta = min(dw, dh) + if delta > 0: + nwidth = int(width + delta*(width)) + nheight = int(height + delta*(height)) + im = im.resize((int(nwidth), int(nheight)), PILImage.ANTIALIAS).convert('RGB') + return im + +def process_title_page(mi, filelist, htmlfilemap, opts, tdir): + old_title_page = None + if mi.cover: + if os.path.samefile(filelist[0].path, mi.cover): + old_title_page = htmlfilemap[filelist[0].path] + + #logger = logging.getLogger('html2epub') + metadata_cover = mi.cover + if metadata_cover and not os.path.exists(metadata_cover): + metadata_cover = None + if metadata_cover is not None: + with open(metadata_cover, 'rb') as src: + try: + im = PILImage.open(src) + if opts.profile.screen_size is not None: + im = resize_cover(im, opts) + metadata_cover = im + except: + metadata_cover = None + + specified_cover = opts.cover + if specified_cover and not os.path.exists(specified_cover): + specified_cover = None + if specified_cover is not None: + with open(specified_cover, 'rb') as src: + try: + im = PILImage.open(src) + if opts.profile.screen_size is not None: + im = resize_cover(im, opts) + specified_cover = im + except: + specified_cover = None + + cover = metadata_cover if specified_cover is None or (opts.prefer_metadata_cover and metadata_cover is not None) else specified_cover + if hasattr(cover, 'save'): + cpath = '/'.join(('resources', '_cover_.jpg')) + cover_dest = os.path.join(tdir, 'content', *cpath.split('/')) + with open(cover_dest, 'wb') as f: + im.save(f, format='jpeg') + titlepage = '''\ + + + Cover + + + +
+ cover +
+ + + '''%cpath + tp = 'calibre_title_page.html' if old_title_page is None else old_title_page + tppath = os.path.join(tdir, 'content', tp) + with open(tppath, 'wb') as f: + f.write(titlepage) + return tp if old_title_page is None else None, True + + return None, old_title_page is not None + + def convert(htmlfile, opts, notification=None): htmlfile = os.path.abspath(htmlfile) if opts.output is None: @@ -143,49 +215,16 @@ def convert(htmlfile, opts, notification=None): if opts.keep_intermediate: print 'Intermediate files in', tdir resource_map, htmlfile_map, generated_toc = parse_content(filelist, opts, tdir) + logger = logging.getLogger('html2epub') resources = [os.path.join(tdir, 'content', f) for f in resource_map.values()] - cover_src = None - if mi.cover and os.access(mi.cover, os.R_OK): - cover_src = mi.cover - else: - mi.cover = None - if opts.cover is not None and not opts.prefer_metadata_cover: - cover_src = opts.cover - if cover_src is not None: - cover_dest = os.path.join(tdir, 'content', 'resources', '_cover_.jpg') - PILImage.open(cover_src).convert('RGB').save(cover_dest) - mi.cover = cover_dest - resources.append(cover_dest) - + title_page, has_title_page = process_title_page(mi, filelist, htmlfile_map, opts, tdir) spine = [htmlfile_map[f.path] for f in filelist] - if mi.cover: - cpath = '/'.join(('resources', os.path.basename(mi.cover))) - if opts.profile.screen_size is not None: - im = PILImage.open(os.path.join(tdir, 'content', *cpath.split('/'))) - width, height = im.size - dw, dh = (opts.profile.screen_size[0]-width)/float(width), (opts.profile.screen_size[1]-height)/float(height) - delta = min(dw, dh) - if delta > 0: - nwidth = int(width + delta*(width)) - nheight = int(height + delta*(height)) - im.resize((int(nwidth), int(nheight)), PILImage.ANTIALIAS).convert('RGB').save(os.path.join(tdir, 'content', *cpath.split('/'))) - cover = '''\ - - Cover Page - -
- cover -
- -'''%cpath - cpath = os.path.join(tdir, 'content', 'calibre_cover_page.html') - with open(cpath, 'wb') as f: - f.write(cover) - spine[0:0] = [os.path.basename(cpath)] - mi.cover = None - mi.cover_data = (None, None) + if title_page is not None: + spine = [title_page] + spine + mi.cover = None + mi.cover_data = (None, None) mi = create_metadata(tdir, mi, spine, resources) @@ -200,8 +239,6 @@ def convert(htmlfile, opts, notification=None): opf_path = os.path.join(tdir, 'metadata.opf') with open(opf_path, 'wb') as f: mi.render(f, buf, 'toc.ncx') - if opts.show_opf: - print open(os.path.join(tdir, 'metadata.opf')).read() toc = buf.getvalue() if toc: with open(os.path.join(tdir, 'toc.ncx'), 'wb') as f: @@ -209,9 +246,18 @@ def convert(htmlfile, opts, notification=None): if opts.show_ncx: print toc split(opf_path, opts) + opf = OPF(opf_path, tdir) + opf.remove_guide() + if has_title_page: + opf.create_guide_element() + opf.add_guide_item('cover', 'Cover', 'content/'+spine[0]) + with open(opf_path, 'wb') as f: + f.write(opf.render()) epub = initialize_container(opts.output) epub.add_dir(tdir) - print 'Output written to', opts.output + if opts.show_opf: + print open(os.path.join(tdir, 'metadata.opf')).read() + logger.info('Output written to %s'%opts.output) if opts.extract_to is not None: epub.extractall(opts.extract_to) diff --git a/src/calibre/ebooks/metadata/opf2.py b/src/calibre/ebooks/metadata/opf2.py index c49f89d506..18bcdde45f 100644 --- a/src/calibre/ebooks/metadata/opf2.py +++ b/src/calibre/ebooks/metadata/opf2.py @@ -530,6 +530,27 @@ class OPF(object): i = spine.index(x) spine[i:i+1] = items + def create_guide_element(self): + e = etree.SubElement(self.root, '{%s}guide'%self.NAMESPACES['opf']) + e.text = '\n ' + e.tail = '\n' + return e + + def remove_guide(self): + self.guide = None + for g in self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'}): + self.root.remove(g) + + def create_guide_item(self, type, title, href): + e = etree.Element('{%s}reference'%self.NAMESPACES['opf'], + type=type, title=title, href=href) + e.tail='\n' + return e + + def add_guide_item(self, type, title, href): + g = self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'})[0] + g.append(self.create_guide_item(type, title, href)) + def iterguide(self): return self.guide_path(self.root) @@ -628,6 +649,7 @@ class OPF(object): matches[0].text = unicode(val) return property(fget=fget, fset=fset) + @apply def cover(): @@ -641,8 +663,12 @@ class OPF(object): def fset(self, path): if self.guide is not None: self.guide.set_cover(path) + for item in list(self.iterguide()): + if 'cover' in item.get('type', ''): + item.getparent().remove(item) + else: - g = etree.SubElement(self.root, 'opf:guide', nsmap=self.NAMESPACES) + g = self.create_guide_element() self.guide = Guide() self.guide.set_cover(path) etree.SubElement(g, 'opf:reference', nsmap=self.NAMESPACES, diff --git a/src/calibre/ptempfile.py b/src/calibre/ptempfile.py index ae9fa26cb7..8a1cac4a54 100644 --- a/src/calibre/ptempfile.py +++ b/src/calibre/ptempfile.py @@ -72,6 +72,6 @@ class TemporaryDirectory(object): return self.tdir def __exit__(self, *args): - if not self.keep: - shutil.rmtree(self.tdir) - + if not self.keep and os.path.exists(self.tdir): + shutil.rmtree(self.tdir, ignore_errors=True) +