EPUB Input: Handle EPUBs with duplicate entries in the manifest. Fixes #925831 (failed to convert epub (without drm) into mobi)

This commit is contained in:
Kovid Goyal 2012-02-03 09:43:39 +05:30
parent b8d0e1a6b0
commit c1e4072ef6
2 changed files with 22 additions and 0 deletions

View File

@ -1077,6 +1077,12 @@ class Manifest(object):
if item in self.oeb.spine:
self.oeb.spine.remove(item)
def remove_duplicate_item(self, item):
if item in self.ids:
item = self.ids[item]
del self.ids[item.id]
self.items.remove(item)
def generate(self, id=None, href=None):
"""Generate a new unique identifier and/or internal path for use in
creating a new manifest item, using the provided :param:`id` and/or

View File

@ -627,11 +627,27 @@ class OEBReader(object):
return
self.oeb.metadata.add('cover', cover.id)
def _manifest_remove_duplicates(self):
seen = set()
dups = set()
for item in self.oeb.manifest:
if item.href in seen:
dups.add(item.href)
seen.add(item.href)
for href in dups:
items = [x for x in self.oeb.manifest if x.href == href]
for x in items:
if x not in self.oeb.spine:
self.oeb.log.warn('Removing duplicate manifest item with id:', x.id)
self.oeb.manifest.remove_duplicate_item(x)
def _all_from_opf(self, opf):
self.oeb.version = opf.get('version', '1.2')
self._metadata_from_opf(opf)
self._manifest_from_opf(opf)
self._spine_from_opf(opf)
self._manifest_remove_duplicates()
self._guide_from_opf(opf)
item = self._find_ncx(opf)
self._toc_from_opf(opf, item)