diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index fbcec7861c..6d3f5cde85 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -328,6 +328,7 @@ class HTMLInput(InputFormatPlugin): filelist = get_filelist(htmlpath, basedir, opts, log) + filelist = [f for f in filelist if not f.is_binary] htmlfile_map = {} for f in filelist: path = f.path @@ -336,6 +337,7 @@ class HTMLInput(InputFormatPlugin): id, href = oeb.manifest.generate(id='html', href=bname) htmlfile_map[path] = href item = oeb.manifest.add(id, href, 'text/html') + item.html_input_href = bname oeb.spine.add(item, True) self.added_resources = {} @@ -409,8 +411,9 @@ class HTMLInput(InputFormatPlugin): if not islinux: link = link.lower() if link not in self.added_resources: + bhref = os.path.basename(link) id, href = self.oeb.manifest.generate(id='added', - href=os.path.basename(link)) + href=bhref) self.oeb.log.debug('Added', link) self.oeb.container = self.DirContainer(os.path.dirname(link), self.oeb.log) @@ -418,7 +421,9 @@ class HTMLInput(InputFormatPlugin): guessed = self.guess_type(href)[0] media_type = guessed or self.BINARY_MIME - self.oeb.manifest.add(id, href, media_type).data + item = self.oeb.manifest.add(id, href, media_type) + item.html_input_href = bhref + item.data self.added_resources[link] = href nlink = self.added_resources[link] diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index 75a1ffb04d..215e5a65ce 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -793,7 +793,7 @@ class Manifest(object): self.oeb.log.warn('File %r does not appear to be (X)HTML'%self.href) nroot = etree.fromstring('') for child in list(data): - child.getparent.remove(child) + child.getparent().remove(child) nroot.append(child) data = nroot elif not namespace(data.tag): @@ -927,7 +927,8 @@ class Manifest(object): if data is None: if self._loader is None: return None - data = self._loader(self.href) + data = self._loader(getattr(self, 'html_input_href', + self.href)) if not isinstance(data, basestring): pass # already parsed elif self.media_type.lower() in OEB_DOCS: