A nicer merge algorithm

This commit is contained in:
Kovid Goyal 2014-03-31 21:58:13 +05:30
parent 2133441fe8
commit 39e03a4732

View File

@ -119,11 +119,8 @@ def process_index(field, index, xe_fields, log):
return hyperlinks, blocks return hyperlinks, blocks
def split_up_block(block, a, text, prefix_map): def split_up_block(block, a, text, parts):
parts = filter(None, (x.strip() for x in text.split(':'))) prefix = parts[:-1]
if len(parts) < 2:
return
prefix_map[block] = prefix = parts[:-1]
a.text = parts[-1] a.text = parts[-1]
parent = a.getparent() parent = a.getparent()
style = 'display:block; margin-left: %.3gem' style = 'display:block; margin-left: %.3gem'
@ -136,23 +133,25 @@ def split_up_block(block, a, text, prefix_map):
parent.append(span) parent.append(span)
span.append(a) span.append(a)
def merge_blocks(prev_block, next_block, prefix=False): def merge_blocks(prev_block, next_block, prev_path, next_path):
pa, na = prev_block.xpath('descendant::a'), next_block.xpath('descendant::a[1]') pa, na = prev_block.xpath('descendant::a'), next_block.xpath('descendant::a[1]')
if not pa or not na: if not pa or not na:
return return
pa, na = pa[-1], na[0] pa, na = pa[-1], na[0]
if prefix: if prev_path == next_path:
ps, ns = pa.getparent(), na.getparent() # Put on same line with a comma
p = ps.getparent()
p.insert(p.index(ps) + 1, ns)
else:
pa.tail = ', ' pa.tail = ', '
p = pa.getparent() p = pa.getparent()
p.insert(p.index(pa) + 1, na) p.insert(p.index(pa) + 1, na)
else:
# Add a line to the previous block
ps, ns = pa.getparent(), na.getparent()
p = ps.getparent()
p.insert(p.index(ps) + 1, ns)
next_block.getparent().remove(next_block) next_block.getparent().remove(next_block)
def polish_index_markup(index, blocks): def polish_index_markup(index, blocks):
text_map, prefix_map, a_map = {}, {}, {} path_map = {}
for block in blocks: for block in blocks:
cls = block.get('class', '') or '' cls = block.get('class', '') or ''
block.set('class', (cls + ' index-entry').lstrip()) block.set('class', (cls + ' index-entry').lstrip())
@ -160,15 +159,23 @@ def polish_index_markup(index, blocks):
text = '' text = ''
if a: if a:
text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip() text = etree.tostring(a[0], method='text', with_tail=False, encoding=unicode).strip()
text_map[block] = text
if ':' in text: if ':' in text:
split_up_block(block, a[0], text, prefix_map) path_map[block] = parts = filter(None, (x.strip() for x in text.split(':')))
if len(parts) > 1:
split_up_block(block, a[0], text, parts)
else:
path_map[block] = [text]
prev_block = None prev_block = blocks[0]
for block in blocks: for block in blocks[1:]:
if text_map[block] == text_map.get(prev_block, None): pp, pn = path_map[prev_block], path_map[block]
merge_blocks(prev_block, block) if pp == pn:
if block in prefix_map and prefix_map[block] == prefix_map.get(prev_block, None): merge_blocks(prev_block, block, pp, pn)
merge_blocks(prev_block, block, prefix=True) elif len(pp) > 1 and len(pn) >= len(pp):
if pn[:-1] in (pp[:-1], pp):
merge_blocks(prev_block, block, pp, pn)
# It's possible to have pn starting with pp but having more
# than one extra entry, but until I see that in the wild, I'm not
# going to bother
prev_block = block prev_block = block