mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
DOCX Input: Fix handling of hyperlink in documents where the hyperlink text is split up in multiple field components
This commit is contained in:
parent
2a9643793a
commit
b23cb51d74
@ -16,16 +16,21 @@ class Field(object):
|
|||||||
self.start = start
|
self.start = start
|
||||||
self.end = None
|
self.end = None
|
||||||
self.contents = []
|
self.contents = []
|
||||||
self.elements = []
|
self.buf = []
|
||||||
self.instructions = []
|
self.instructions = None
|
||||||
|
self.name = None
|
||||||
|
|
||||||
def add_instr(self, elem):
|
def add_instr(self, elem):
|
||||||
raw = elem.text
|
raw = elem.text
|
||||||
if not raw:
|
if not raw:
|
||||||
return
|
return
|
||||||
name, rest = raw.strip().partition(' ')[0::2]
|
if self.name is None:
|
||||||
self.instructions.append((name, rest.strip()))
|
self.name, raw = raw.strip().partition(' ')[0::2]
|
||||||
self.elements.append(elem)
|
self.buf.append(raw)
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
self.instructions = ''.join(self.buf)
|
||||||
|
del self.buf
|
||||||
|
|
||||||
WORD, FLAG = 0, 1
|
WORD, FLAG = 0, 1
|
||||||
scanner = re.Scanner([
|
scanner = re.Scanner([
|
||||||
@ -109,48 +114,46 @@ class Fields(object):
|
|||||||
setattr(self, '%s_fields' % f, [])
|
setattr(self, '%s_fields' % f, [])
|
||||||
|
|
||||||
for field in self.fields:
|
for field in self.fields:
|
||||||
|
field.finalize()
|
||||||
if field.instructions:
|
if field.instructions:
|
||||||
name = field.instructions[0][0]
|
func = parsers.get(field.name, None)
|
||||||
func = parsers.get(name, None)
|
|
||||||
if func is not None:
|
if func is not None:
|
||||||
func(field, field_parsers[name], log)
|
func(field, field_parsers[field.name], log)
|
||||||
|
|
||||||
def parse_hyperlink(self, field, parse_func, log):
|
def parse_hyperlink(self, field, parse_func, log):
|
||||||
# Parse hyperlink fields
|
# Parse hyperlink fields
|
||||||
if len(field.instructions) == 1:
|
hl = parse_func(field.instructions, log)
|
||||||
hl = parse_func(field.instructions[0][1], log)
|
if hl:
|
||||||
if hl:
|
if 'target' in hl and hl['target'] is None:
|
||||||
if 'target' in hl and hl['target'] is None:
|
hl['target'] = '_blank'
|
||||||
hl['target'] = '_blank'
|
all_runs = []
|
||||||
all_runs = []
|
current_runs = []
|
||||||
current_runs = []
|
# We only handle spans in a single paragraph
|
||||||
# We only handle spans in a single paragraph
|
# being wrapped in <a>
|
||||||
# being wrapped in <a>
|
for x in field.contents:
|
||||||
for x in field.contents:
|
if x.tag.endswith('}p'):
|
||||||
if x.tag.endswith('}p'):
|
if current_runs:
|
||||||
if current_runs:
|
all_runs.append(current_runs)
|
||||||
all_runs.append(current_runs)
|
current_runs = []
|
||||||
current_runs = []
|
elif x.tag.endswith('}r'):
|
||||||
elif x.tag.endswith('}r'):
|
current_runs.append(x)
|
||||||
current_runs.append(x)
|
if current_runs:
|
||||||
if current_runs:
|
all_runs.append(current_runs)
|
||||||
all_runs.append(current_runs)
|
for runs in all_runs:
|
||||||
for runs in all_runs:
|
self.hyperlink_fields.append((hl, runs))
|
||||||
self.hyperlink_fields.append((hl, runs))
|
|
||||||
|
|
||||||
def parse_xe(self, field, parse_func, log):
|
def parse_xe(self, field, parse_func, log):
|
||||||
# Parse XE fields
|
# Parse XE fields
|
||||||
xe = parse_func(field.instructions[0][1], log) # TODO: Handle field with multiple instructions
|
xe = parse_func(field.instructions, log) # TODO: Handle field with multiple instructions
|
||||||
if xe:
|
if xe:
|
||||||
# TODO: parse the field contents
|
# TODO: parse the field contents
|
||||||
self.xe_fields.append(xe)
|
self.xe_fields.append(xe)
|
||||||
|
|
||||||
def parse_index(self, field, parse_func, log):
|
def parse_index(self, field, parse_func, log):
|
||||||
# Parse Index fields
|
# Parse Index fields
|
||||||
if len(field.instructions):
|
idx = parse_func(field.instructions, log)
|
||||||
idx = parse_func(field.instructions[0][1], log)
|
# TODO: parse the field contents
|
||||||
# TODO: parse the field contents
|
self.index_fields.append(idx)
|
||||||
self.index_fields.append(idx)
|
|
||||||
|
|
||||||
def test_parse_fields():
|
def test_parse_fields():
|
||||||
import unittest
|
import unittest
|
||||||
|
Loading…
x
Reference in New Issue
Block a user