DOCX Input: Handle docx files with index fields that have their field names incorrectly lower cased. Fixes #1318670 [Conversion from DOCX, probably indexitem related](https://bugs.launchpad.net/calibre/+bug/1318670)

This commit is contained in:
Kovid Goyal 2014-05-12 20:50:34 +05:30
parent fa9b43f7f1
commit 97b222caca
2 changed files with 12 additions and 4 deletions

View File

@ -126,10 +126,13 @@ class Fields(object):
field_types = ('hyperlink', 'xe', 'index', 'ref', 'noteref') field_types = ('hyperlink', 'xe', 'index', 'ref', 'noteref')
parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types} parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types}
parsers.update({x:getattr(self, 'parse_'+x) for x in field_types})
field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types} field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types}
field_parsers.update({f:globals()['parse_%s' % f] for f in field_types})
for f in field_types: for f in field_types:
setattr(self, '%s_fields' % f, []) setattr(self, '%s_fields' % f, [])
unknown_fields = {'TOC', 'toc', 'PAGEREF', 'pageref'} # The TOC and PAGEREF fields are handled separately
for field in self.fields: for field in self.fields:
field.finalize() field.finalize()
@ -137,6 +140,9 @@ class Fields(object):
func = parsers.get(field.name, None) func = parsers.get(field.name, None)
if func is not None: if func is not None:
func(field, field_parsers[field.name], log) func(field, field_parsers[field.name], log)
elif field.name not in unknown_fields:
log.warn('Encountered unknown field: %s, ignoring it.' % field.name)
unknown_fields.add(field.name)
def get_runs(self, field): def get_runs(self, field):
all_runs = [] all_runs = []
@ -200,6 +206,8 @@ class Fields(object):
return return
idx = parse_func(field.instructions, log) idx = parse_func(field.instructions, log)
hyperlinks, blocks = process_index(field, idx, self.xe_fields, log) hyperlinks, blocks = process_index(field, idx, self.xe_fields, log)
if not blocks:
return
for anchor, run in hyperlinks: for anchor, run in hyperlinks:
self.hyperlink_fields.append(({'anchor':anchor}, [run])) self.hyperlink_fields.append(({'anchor':anchor}, [run]))

View File

@ -91,7 +91,7 @@ def process_index(field, index, xe_fields, log):
xe_fields = get_applicable_xe_fields(index, xe_fields) xe_fields = get_applicable_xe_fields(index, xe_fields)
if not xe_fields: if not xe_fields:
return return [], []
if heading_text is not None: if heading_text is not None:
groups = partition_by_first_letter(xe_fields, key=itemgetter('text')) groups = partition_by_first_letter(xe_fields, key=itemgetter('text'))
items = [] items = []
@ -138,7 +138,7 @@ def split_up_block(block, a, text, parts, ldict):
""" """
The merge algorithm is a little tricky. The merge algorithm is a little tricky.
We start with a list of elementary blocks. Each is an HtmlElement, a p node We start with a list of elementary blocks. Each is an HtmlElement, a p node
with a list of child nodes. The last child is a link, and the earlier ones are with a list of child nodes. The last child is a link, and the earlier ones are
just text. just text.
The list is in reverse order from what we want in the index. The list is in reverse order from what we want in the index.
There is a dictionary ldict which records the level of each child node. There is a dictionary ldict which records the level of each child node.
@ -159,7 +159,7 @@ If there are no more levels in n, then add the link from nk to the links for pk.
This might be the first link for pk, or we might get a list of references. This might be the first link for pk, or we might get a list of references.
Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have
the same text, it must follow pk, it must come before we find any other p entries at the same text, it must follow pk, it must come before we find any other p entries at
the same level as pk, and it must have the same level as nk+1. the same level as pk, and it must have the same level as nk+1.
If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1). If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1).
@ -208,7 +208,7 @@ def merge_blocks(prev_block, next_block, pind, nind, next_path, ldict):
if prevent > 0: if prevent > 0:
merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict) merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict)
return return
# Want to insert elements into previous block # Want to insert elements into previous block
while nind < len(next_block): while nind < len(next_block):
# insert takes it out of old # insert takes it out of old