From b128849723b08d2d75ac655b6a32152ee04c8322 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Sun, 1 Jul 2012 10:03:26 +0530 Subject: [PATCH] Conversion pipeline: When removing the first image, also remove the html file the image is found in, if that file has no other content --- src/calibre/ebooks/oeb/transforms/jacket.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/calibre/ebooks/oeb/transforms/jacket.py b/src/calibre/ebooks/oeb/transforms/jacket.py index 5947087535..8fcddc7080 100644 --- a/src/calibre/ebooks/oeb/transforms/jacket.py +++ b/src/calibre/ebooks/oeb/transforms/jacket.py @@ -13,7 +13,7 @@ from lxml import etree from calibre import guess_type, strftime from calibre.ebooks.BeautifulSoup import BeautifulSoup -from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML +from calibre.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag from calibre.library.comments import comments_to_html from calibre.utils.date import is_date_undefined from calibre.ebooks.chardet import strip_encoding_declarations @@ -41,11 +41,25 @@ class Jacket(object): return removed def remove_first_image(self): + deleted_item = None for item in self.oeb.spine: removed = self.remove_images(item) if removed > 0: self.log('Removed first image') + body = XPath('//h:body')(item.data) + if body: + raw = xml2text(body[0]).strip() + imgs = XPath('//h:img|//svg:svg')(item.data) + if not raw and not imgs: + self.log('Removing %s as it has no content'%item.href) + self.oeb.manifest.remove(item) + deleted_item = item break + if deleted_item is not None: + for item in list(self.oeb.toc): + href = urldefrag(item.href)[0] + if href == deleted_item.href: + self.oeb.toc.remove(item) def insert_metadata(self, mi): self.log('Inserting metadata into book...')