From c1e4072ef6a304915c78285240654691ef6a9cda Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 3 Feb 2012 09:43:39 +0530 Subject: [PATCH] EPUB Input: Handle EPUBs with duplicate entries in the manifest. Fixes #925831 (failed to convert epub (without drm) into mobi) --- src/calibre/ebooks/oeb/base.py | 6 ++++++ src/calibre/ebooks/oeb/reader.py | 16 ++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/src/calibre/ebooks/oeb/base.py b/src/calibre/ebooks/oeb/base.py index b902eb9701..2778f7fc8a 100644 --- a/src/calibre/ebooks/oeb/base.py +++ b/src/calibre/ebooks/oeb/base.py @@ -1077,6 +1077,12 @@ class Manifest(object): if item in self.oeb.spine: self.oeb.spine.remove(item) + def remove_duplicate_item(self, item): + if item in self.ids: + item = self.ids[item] + del self.ids[item.id] + self.items.remove(item) + def generate(self, id=None, href=None): """Generate a new unique identifier and/or internal path for use in creating a new manifest item, using the provided :param:`id` and/or diff --git a/src/calibre/ebooks/oeb/reader.py b/src/calibre/ebooks/oeb/reader.py index 3d44589e6d..a458df5a83 100644 --- a/src/calibre/ebooks/oeb/reader.py +++ b/src/calibre/ebooks/oeb/reader.py @@ -627,11 +627,27 @@ class OEBReader(object): return self.oeb.metadata.add('cover', cover.id) + def _manifest_remove_duplicates(self): + seen = set() + dups = set() + for item in self.oeb.manifest: + if item.href in seen: + dups.add(item.href) + seen.add(item.href) + + for href in dups: + items = [x for x in self.oeb.manifest if x.href == href] + for x in items: + if x not in self.oeb.spine: + self.oeb.log.warn('Removing duplicate manifest item with id:', x.id) + self.oeb.manifest.remove_duplicate_item(x) + def _all_from_opf(self, opf): self.oeb.version = opf.get('version', '1.2') self._metadata_from_opf(opf) self._manifest_from_opf(opf) self._spine_from_opf(opf) + self._manifest_remove_duplicates() self._guide_from_opf(opf) item = self._find_ncx(opf) self._toc_from_opf(opf, item)