From 5a890aa39983b533c02b335f57cbeeb7e8baa3e4 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Wed, 16 May 2012 11:59:58 +0530 Subject: [PATCH] EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically. Fixes #999959 (Private bug) --- src/calibre/ebooks/oeb/transforms/metadata.py | 42 ++++++++++++++++++- 1 file changed, 40 insertions(+), 2 deletions(-) diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index 41d5421dde..99296ff6a3 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -6,7 +6,7 @@ __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import os +import os, re from calibre.utils.date import isoformat, now from calibre import guess_type @@ -141,7 +141,7 @@ class MergeMetadata(object): item = self.oeb.manifest.hrefs[old_cover.href] if not cdata: return item.id - self.oeb.manifest.remove(item) + self.remove_old_cover(item) elif not cdata: id = self.oeb.manifest.generate(id='cover') self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') @@ -152,3 +152,41 @@ class MergeMetadata(object): self.oeb.guide.add('cover', 'Cover', href) return id + def remove_old_cover(self, cover_item): + from calibre.ebooks.oeb.base import XPath + from lxml import etree + + self.oeb.manifest.remove(cover_item) + + # Remove any references to the cover in the HTML + affected_items = set() + for item in self.oeb.spine: + try: + images = XPath('//h:img[@src]')(item.data) + except: + images = [] + removed = False + for img in images: + href = item.abshref(img.get('src')) + if href == cover_item.href: + img.getparent().remove(img) + removed = True + if removed: + affected_items.add(item) + + # Check if the resulting HTML has no content, if so remove it + for item in affected_items: + body = XPath('//h:body')(item.data) + if body: + text = etree.tostring(body[0], method='text', encoding=unicode) + else: + text = '' + text = re.sub(r'\s+', '', text) + if not text: + self.log('Removing %s as it is a wrapper around' + ' the cover image'%item.href) + self.oeb.spine.remove(item) + self.oeb.manifest.remove(item) + + +