diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 36c46a81c0..cd60e4fe25 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1435,6 +1435,11 @@ class Guide(object): def remove(self, type): return self.refs.pop(type, None) + def remove_by_href(self, href): + remove = [r for r, i in self.refs.iteritems() if i.href == href] + for r in remove: + self.remove(r) + def iterkeys(self): for type in self.refs: yield type diff --git a/src/calibre/ebooks/oeb/transforms/metadata.py b/src/calibre/ebooks/oeb/transforms/metadata.py index dbbdeb3c52..640da6685e 100644 --- a/src/calibre/ebooks/oeb/transforms/metadata.py +++ b/src/calibre/ebooks/oeb/transforms/metadata.py @@ -144,44 +144,59 @@ class MergeMetadata(object): # The amazon formats dont support html cover pages, so remove them # even if no cover was specified. self.oeb.guide.remove('titlepage') + do_remove_old_cover = False if old_cover is not None: if old_cover.href in self.oeb.manifest.hrefs: item = self.oeb.manifest.hrefs[old_cover.href] if not cdata: return item.id - self.remove_old_cover(item) + do_remove_old_cover = True elif not cdata: id = self.oeb.manifest.generate(id='cover')[0] self.oeb.manifest.add(id, old_cover.href, 'image/jpeg') return id + new_cover_item = None if cdata: id, href = self.oeb.manifest.generate('cover', 'cover.'+ext) - self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata) + new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata) self.oeb.guide.add('cover', 'Cover', href) + if do_remove_old_cover: + self.remove_old_cover(item, new_cover_item.href) return id - def remove_old_cover(self, cover_item): - from calibre.ebooks.oeb.base import XPath + def remove_old_cover(self, cover_item, new_cover_href=None): + from calibre.ebooks.oeb.base import XPath, XLINK from lxml import etree self.oeb.manifest.remove(cover_item) # Remove any references to the cover in the HTML affected_items = set() - for item in self.oeb.spine: + xp = XPath('//h:img[@src]|//svg:image[@xl:href]') + for i, item in enumerate(self.oeb.spine): try: - images = XPath('//h:img[@src]')(item.data) + images = xp(item.data) except Exception: images = () removed = False for img in images: + href = img.get('src') or img.get(XLINK('href')) try: - href = item.abshref(img.get('src')) + href = item.abshref(href) except Exception: continue # Invalid URL, ignore if href == cover_item.href: - img.getparent().remove(img) - removed = True + if new_cover_href is not None: + replacement_href = item.relhref(new_cover_href) + attr = 'src' if img.tag.endswith('img') else XLINK('href') + img.set(attr, replacement_href) + else: + p = img.getparent() + if p.tag.endswith('}svg'): + p.getparent().remove(p) + else: + p.remove(img) + removed = True if removed: affected_items.add(item) @@ -198,3 +213,4 @@ class MergeMetadata(object): ' the cover image'%item.href) self.oeb.spine.remove(item) self.oeb.manifest.remove(item) + self.oeb.guide.remove_by_href(item.href)