diff --git a/src/calibre/ebooks/docx/fields.py b/src/calibre/ebooks/docx/fields.py
index 24be0cad86..90e80423ff 100644
--- a/src/calibre/ebooks/docx/fields.py
+++ b/src/calibre/ebooks/docx/fields.py
@@ -37,7 +37,9 @@ scanner = re.Scanner([
null = object()
-def parser(name, field_map, default_field_name):
+def parser(name, field_map, default_field_name=None):
+
+ field_map = dict((x.split(':') for x in field_map.split()))
def parse(raw, log=None):
ans = {}
@@ -63,10 +65,15 @@ def parser(name, field_map, default_field_name):
return parse
parse_hyperlink = parser('hyperlink',
- {'l':'anchor', 'm':'image-map', 'n':'target', 'o':'title', 't':'target'}, 'url')
+ 'l:anchor m:image-map n:target o:title t:target', 'url')
parse_xe = parser('xe',
- {'b':'bold', 'i':'italic', 'f':'entry_type', 'r':'page_range_bookmark', 't':'page_number_text', 'y':'yomi'}, 'text')
+ 'b:bold i:italic f:entry-type r:page-range-bookmark t:page-number-text y:yomi', 'text')
+
+parse_index = parser('index',
+ 'b:bookmark c:columns-per-page d:sequence-separator e:first-page-number-separator'
+ ' f:entry-type g:page-range-separator h:heading k:crossref-separator'
+ ' p:page-number-separator r:run-together y:yomi z:langcode')
class Fields(object):
@@ -94,38 +101,56 @@ class Fields(object):
if stack:
stack[-1].contents.append(elem)
- # Parse hyperlink fields
- self.hyperlink_fields = []
- for field in self.fields:
- if len(field.instructions) == 1 and field.instructions[0][0] == 'HYPERLINK':
- hl = parse_hyperlink(field.instructions[0][1], log)
- if hl:
- if 'target' in hl and hl['target'] is None:
- hl['target'] = '_blank'
- all_runs = []
- current_runs = []
- # We only handle spans in a single paragraph
- # being wrapped in
- for x in field.contents:
- if x.tag.endswith('}p'):
- if current_runs:
- all_runs.append(current_runs)
- current_runs = []
- elif x.tag.endswith('}r'):
- current_runs.append(x)
- if current_runs:
- all_runs.append(current_runs)
- for runs in all_runs:
- self.hyperlink_fields.append((hl, runs))
+ field_types = ('hyperlink', 'xe', 'index')
+ parsers = {x.upper():getattr(self, 'parse_'+x) for x in field_types}
+ field_parsers = {f.upper():globals()['parse_%s' % f] for f in field_types}
+
+ for f in field_types:
+ setattr(self, '%s_fields' % f, [])
- # Parse XE fields
- self.xe_fields = []
for field in self.fields:
- if len(field.instructions) >= 1 and field.instructions[0][0] == 'HYPERLINK':
- xe = parse_xe(field.instructions[0][1], log) # TODO: Handle field with multiple instructions
- if xe:
- # TODO: parse the field contents
- self.xe_fields.append(xe)
+ if field.instructions:
+ name = field.instructions[0][0]
+ func = parsers.get(name, None)
+ if func is not None:
+ func(field, field_parsers[name], log)
+
+ def parse_hyperlink(self, field, parse_func, log):
+ # Parse hyperlink fields
+ if len(field.instructions) == 1:
+ hl = parse_func(field.instructions[0][1], log)
+ if hl:
+ if 'target' in hl and hl['target'] is None:
+ hl['target'] = '_blank'
+ all_runs = []
+ current_runs = []
+ # We only handle spans in a single paragraph
+ # being wrapped in
+ for x in field.contents:
+ if x.tag.endswith('}p'):
+ if current_runs:
+ all_runs.append(current_runs)
+ current_runs = []
+ elif x.tag.endswith('}r'):
+ current_runs.append(x)
+ if current_runs:
+ all_runs.append(current_runs)
+ for runs in all_runs:
+ self.hyperlink_fields.append((hl, runs))
+
+ def parse_xe(self, field, parse_func, log):
+ # Parse XE fields
+ xe = parse_func(field.instructions[0][1], log) # TODO: Handle field with multiple instructions
+ if xe:
+ # TODO: parse the field contents
+ self.xe_fields.append(xe)
+
+ def parse_index(self, field, parse_func, log):
+ # Parse Index fields
+ if len(field.instructions):
+ idx = parse_func(field.instructions[0][1], log)
+ # TODO: parse the field contents
+ self.index_fields.append(idx)
def test_parse_fields():
import unittest
@@ -146,6 +171,11 @@ def test_parse_fields():
ae(r'name \b \i', {'text':'name', 'bold':None, 'italic':None})
ae(r'xxx \y a', {'text':'xxx', 'yomi':'a'})
+ def test_index(self):
+ ae = lambda x, y: self.assertEqual(parse_index(x, None), y)
+ ae(r'', {})
+ ae(r'\b \c 1', {'bookmark':None, 'columns-per-page': '1'})
+
suite = unittest.TestLoader().loadTestsFromTestCase(TestParseFields)
unittest.TextTestRunner(verbosity=4).run(suite)