EPUB Input: When setting the cover for a book that identifies its cover image, but not the html wrapper around the cover, try to detect and remove that wrapper automatically. Fixes #999959 (Private bug)

This commit is contained in:
Kovid Goyal 2012-05-16 11:59:58 +05:30
parent 795ebbb573
commit 5a890aa399

View File

@ -6,7 +6,7 @@ __license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import os import os, re
from calibre.utils.date import isoformat, now from calibre.utils.date import isoformat, now
from calibre import guess_type from calibre import guess_type
@ -141,7 +141,7 @@ class MergeMetadata(object):
item = self.oeb.manifest.hrefs[old_cover.href] item = self.oeb.manifest.hrefs[old_cover.href]
if not cdata: if not cdata:
return item.id return item.id
self.oeb.manifest.remove(item) self.remove_old_cover(item)
elif not cdata: elif not cdata:
id = self.oeb.manifest.generate(id='cover') id = self.oeb.manifest.generate(id='cover')
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
@ -152,3 +152,41 @@ class MergeMetadata(object):
self.oeb.guide.add('cover', 'Cover', href) self.oeb.guide.add('cover', 'Cover', href)
return id return id
def remove_old_cover(self, cover_item):
from calibre.ebooks.oeb.base import XPath
from lxml import etree
self.oeb.manifest.remove(cover_item)
# Remove any references to the cover in the HTML
affected_items = set()
for item in self.oeb.spine:
try:
images = XPath('//h:img[@src]')(item.data)
except:
images = []
removed = False
for img in images:
href = item.abshref(img.get('src'))
if href == cover_item.href:
img.getparent().remove(img)
removed = True
if removed:
affected_items.add(item)
# Check if the resulting HTML has no content, if so remove it
for item in affected_items:
body = XPath('//h:body')(item.data)
if body:
text = etree.tostring(body[0], method='text', encoding=unicode)
else:
text = ''
text = re.sub(r'\s+', '', text)
if not text:
self.log('Removing %s as it is a wrapper around'
' the cover image'%item.href)
self.oeb.spine.remove(item)
self.oeb.manifest.remove(item)