- Fail gracefully on input with fragment-level broken links
This commit is contained in:
Marshall T. Vandegrift 2009-01-19 21:17:27 -05:00
parent 0f56822a07
commit d321b77930

View File

@ -95,6 +95,7 @@ class Serializer(object):
def __init__(self, oeb, images): def __init__(self, oeb, images):
self.oeb = oeb self.oeb = oeb
self.images = images self.images = images
self.logger = oeb.logger
self.id_offsets = {} self.id_offsets = {}
self.href_offsets = defaultdict(list) self.href_offsets = defaultdict(list)
self.breaks = [] self.breaks = []
@ -144,8 +145,8 @@ class Serializer(object):
item = hrefs[path] if path else None item = hrefs[path] if path else None
if item and item.spine_position is None: if item and item.spine_position is None:
return False return False
id = item.id if item else base.id path = item.href if item else base.href
href = '#'.join((id, frag)) if frag else id href = '#'.join((path, frag)) if frag else path
buffer.write('filepos=') buffer.write('filepos=')
self.href_offsets[href].append(buffer.tell()) self.href_offsets[href].append(buffer.tell())
buffer.write('0000000000') buffer.write('0000000000')
@ -170,7 +171,7 @@ class Serializer(object):
buffer = self.buffer buffer = self.buffer
if not item.linear: if not item.linear:
self.breaks.append(buffer.tell() - 1) self.breaks.append(buffer.tell() - 1)
self.id_offsets[item.id] = buffer.tell() self.id_offsets[item.href] = buffer.tell()
for elem in item.data.find(XHTML('body')): for elem in item.data.find(XHTML('body')):
self.serialize_elem(elem, item) self.serialize_elem(elem, item)
buffer.write('<mbp:pagebreak/>') buffer.write('<mbp:pagebreak/>')
@ -183,8 +184,8 @@ class Serializer(object):
tag = prefixname(elem.tag, nsrmap) tag = prefixname(elem.tag, nsrmap)
for attr in ('name', 'id'): for attr in ('name', 'id'):
if attr in elem.attrib: if attr in elem.attrib:
id = '#'.join((item.id, elem.attrib[attr])) href = '#'.join((item.href, elem.attrib[attr]))
self.id_offsets[id] = buffer.tell() self.id_offsets[href] = buffer.tell()
del elem.attrib[attr] del elem.attrib[attr]
if tag == 'a' and not elem.attrib \ if tag == 'a' and not elem.attrib \
and not len(elem) and not elem.text: and not len(elem) and not elem.text:
@ -232,8 +233,12 @@ class Serializer(object):
def fixup_links(self): def fixup_links(self):
buffer = self.buffer buffer = self.buffer
for id, hoffs in self.href_offsets.items(): id_offsets = self.id_offsets
ioff = self.id_offsets[id] for href, hoffs in self.href_offsets.items():
if href not in id_offsets:
self.logger.warn('Hyperlink target %r not found' % href)
href, _ = urldefrag(href)
ioff = self.id_offsets[href]
for hoff in hoffs: for hoff in hoffs:
buffer.seek(hoff) buffer.seek(hoff)
buffer.write('%010d' % ioff) buffer.write('%010d' % ioff)