EPUB Input: When converting a file that has entries in the manifest that do no exist, remove them, instead of aborting the conversion. Fixes #910933 (Failing to convert ePub to any format)

This commit is contained in:
Kovid Goyal 2012-01-03 08:37:28 +05:30
parent c58a9c062f
commit fe31565283

View File

@ -175,13 +175,27 @@ class OEBReader(object):
manifest = self.oeb.manifest manifest = self.oeb.manifest
known = set(manifest.hrefs) known = set(manifest.hrefs)
unchecked = set(manifest.values()) unchecked = set(manifest.values())
cdoc = OEB_DOCS|OEB_STYLES
invalid = set()
while unchecked: while unchecked:
new = set() new = set()
for item in unchecked: for item in unchecked:
data = None
if (item.media_type in cdoc or
item.media_type[-4:] in ('/xml', '+xml')):
try:
data = item.data
except:
self.oeb.log.exception(u'Failed to read from manifest '
u'entry with id: %s, ignoring'%item.id)
invalid.add(item)
continue
if data is None:
continue
if (item.media_type in OEB_DOCS or if (item.media_type in OEB_DOCS or
item.media_type[-4:] in ('/xml', '+xml')) and \ item.media_type[-4:] in ('/xml', '+xml')):
item.data is not None: hrefs = [r[2] for r in iterlinks(data)]
hrefs = [r[2] for r in iterlinks(item.data)]
for href in hrefs: for href in hrefs:
href, _ = urldefrag(href) href, _ = urldefrag(href)
if not href: if not href:
@ -197,7 +211,7 @@ class OEBReader(object):
new.add(href) new.add(href)
elif item.media_type in OEB_STYLES: elif item.media_type in OEB_STYLES:
try: try:
urls = list(cssutils.getUrls(item.data)) urls = list(cssutils.getUrls(data))
except: except:
urls = [] urls = []
for url in urls: for url in urls:
@ -231,6 +245,9 @@ class OEBReader(object):
added = manifest.add(id, href, media_type) added = manifest.add(id, href, media_type)
unchecked.add(added) unchecked.add(added)
for item in invalid:
self.oeb.manifest.remove(item)
def _manifest_from_opf(self, opf): def _manifest_from_opf(self, opf):
manifest = self.oeb.manifest manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'): for elem in xpath(opf, '/o2:package/o2:manifest/o2:item'):