mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Handle docx files with index fields that have their field names incorrectly lower cased. Fixes #1318670 [Conversion from DOCX, probably indexitem related](https://bugs.launchpad.net/calibre/+bug/1318670)
This commit is contained in:
parent
fa9b43f7f1
commit
97b222caca
@ -126,10 +126,13 @@ class Fields(object):
|
|||||||
|
|
||||||
field_types = ('hyperlink', 'xe', 'index', 'ref', 'noteref')
|
field_types = ('hyperlink', 'xe', 'index', 'ref', 'noteref')
|
||||||
parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types}
|
parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types}
|
||||||
|
parsers.update({x:getattr(self, 'parse_'+x) for x in field_types})
|
||||||
field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types}
|
field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types}
|
||||||
|
field_parsers.update({f:globals()['parse_%s' % f] for f in field_types})
|
||||||
|
|
||||||
for f in field_types:
|
for f in field_types:
|
||||||
setattr(self, '%s_fields' % f, [])
|
setattr(self, '%s_fields' % f, [])
|
||||||
|
unknown_fields = {'TOC', 'toc', 'PAGEREF', 'pageref'} # The TOC and PAGEREF fields are handled separately
|
||||||
|
|
||||||
for field in self.fields:
|
for field in self.fields:
|
||||||
field.finalize()
|
field.finalize()
|
||||||
@ -137,6 +140,9 @@ class Fields(object):
|
|||||||
func = parsers.get(field.name, None)
|
func = parsers.get(field.name, None)
|
||||||
if func is not None:
|
if func is not None:
|
||||||
func(field, field_parsers[field.name], log)
|
func(field, field_parsers[field.name], log)
|
||||||
|
elif field.name not in unknown_fields:
|
||||||
|
log.warn('Encountered unknown field: %s, ignoring it.' % field.name)
|
||||||
|
unknown_fields.add(field.name)
|
||||||
|
|
||||||
def get_runs(self, field):
|
def get_runs(self, field):
|
||||||
all_runs = []
|
all_runs = []
|
||||||
@ -200,6 +206,8 @@ class Fields(object):
|
|||||||
return
|
return
|
||||||
idx = parse_func(field.instructions, log)
|
idx = parse_func(field.instructions, log)
|
||||||
hyperlinks, blocks = process_index(field, idx, self.xe_fields, log)
|
hyperlinks, blocks = process_index(field, idx, self.xe_fields, log)
|
||||||
|
if not blocks:
|
||||||
|
return
|
||||||
for anchor, run in hyperlinks:
|
for anchor, run in hyperlinks:
|
||||||
self.hyperlink_fields.append(({'anchor':anchor}, [run]))
|
self.hyperlink_fields.append(({'anchor':anchor}, [run]))
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ def process_index(field, index, xe_fields, log):
|
|||||||
|
|
||||||
xe_fields = get_applicable_xe_fields(index, xe_fields)
|
xe_fields = get_applicable_xe_fields(index, xe_fields)
|
||||||
if not xe_fields:
|
if not xe_fields:
|
||||||
return
|
return [], []
|
||||||
if heading_text is not None:
|
if heading_text is not None:
|
||||||
groups = partition_by_first_letter(xe_fields, key=itemgetter('text'))
|
groups = partition_by_first_letter(xe_fields, key=itemgetter('text'))
|
||||||
items = []
|
items = []
|
||||||
@ -138,7 +138,7 @@ def split_up_block(block, a, text, parts, ldict):
|
|||||||
"""
|
"""
|
||||||
The merge algorithm is a little tricky.
|
The merge algorithm is a little tricky.
|
||||||
We start with a list of elementary blocks. Each is an HtmlElement, a p node
|
We start with a list of elementary blocks. Each is an HtmlElement, a p node
|
||||||
with a list of child nodes. The last child is a link, and the earlier ones are
|
with a list of child nodes. The last child is a link, and the earlier ones are
|
||||||
just text.
|
just text.
|
||||||
The list is in reverse order from what we want in the index.
|
The list is in reverse order from what we want in the index.
|
||||||
There is a dictionary ldict which records the level of each child node.
|
There is a dictionary ldict which records the level of each child node.
|
||||||
@ -159,7 +159,7 @@ If there are no more levels in n, then add the link from nk to the links for pk.
|
|||||||
This might be the first link for pk, or we might get a list of references.
|
This might be the first link for pk, or we might get a list of references.
|
||||||
|
|
||||||
Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have
|
Otherwise nk+1 is the next level in n. Look for a matching entry in p. It must have
|
||||||
the same text, it must follow pk, it must come before we find any other p entries at
|
the same text, it must follow pk, it must come before we find any other p entries at
|
||||||
the same level as pk, and it must have the same level as nk+1.
|
the same level as pk, and it must have the same level as nk+1.
|
||||||
|
|
||||||
If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1).
|
If we find such a matching entry, go back to the start with (p ... pk+1) and (n ... nk+1).
|
||||||
@ -208,7 +208,7 @@ def merge_blocks(prev_block, next_block, pind, nind, next_path, ldict):
|
|||||||
if prevent > 0:
|
if prevent > 0:
|
||||||
merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict)
|
merge_blocks(prev_block, next_block, prevent, nind, next_path, ldict)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Want to insert elements into previous block
|
# Want to insert elements into previous block
|
||||||
while nind < len(next_block):
|
while nind < len(next_block):
|
||||||
# insert takes it out of old
|
# insert takes it out of old
|
||||||
|
Loading…
x
Reference in New Issue
Block a user