mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
PML parser fixes.
This commit is contained in:
parent
8d364272ff
commit
c254f63a07
@ -45,7 +45,7 @@ class PMLInput(InputFormatPlugin):
|
|||||||
ienc = self.options.input_encoding
|
ienc = self.options.input_encoding
|
||||||
|
|
||||||
self.log.debug('Converting PML to HTML...')
|
self.log.debug('Converting PML to HTML...')
|
||||||
hizer = PML_HTMLizer(close_all)
|
hizer = PML_HTMLizer()
|
||||||
html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
|
html = hizer.parse_pml(pml_stream.read().decode(ienc), html_path)
|
||||||
html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
|
html_stream.write('<html><head><title /></head><body>%s</body></html>' % html.encode('utf-8', 'replace'))
|
||||||
|
|
||||||
|
@ -62,11 +62,11 @@ class PML_HTMLizer(object):
|
|||||||
'r': ('<div style="text-align: right;">', '</div>'),
|
'r': ('<div style="text-align: right;">', '</div>'),
|
||||||
't': ('<div style="margin-left: 5%;">', '</div>'),
|
't': ('<div style="margin-left: 5%;">', '</div>'),
|
||||||
'T': ('<div style="margin-left: %s;">', '</div>'),
|
'T': ('<div style="margin-left: %s;">', '</div>'),
|
||||||
'i': ('<span style="font-style : italic;">', '</span>'),
|
'i': ('<span style="font-style: italic;">', '</span>'),
|
||||||
'u': ('<span style="text-decoration : underline;">', '</span>'),
|
'u': ('<span style="text-decoration: underline;">', '</span>'),
|
||||||
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
'd': ('<span style="text-decoration: line-through;">', '</span>'),
|
||||||
'b': ('<span style="font-weight: bold;">', '</span>'),
|
'b': ('<span style="font-weight: bold;">', '</span>'),
|
||||||
'l': ('<span style="font-size: 150%">', '</span>'),
|
'l': ('<span style="font-size: 150%;">', '</span>'),
|
||||||
'FS': ('<div id="%s">', '</div>'),
|
'FS': ('<div id="%s">', '</div>'),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,8 +113,7 @@ class PML_HTMLizer(object):
|
|||||||
'b',
|
'b',
|
||||||
]
|
]
|
||||||
|
|
||||||
def __init__(self, close_all):
|
def __init__(self):
|
||||||
self.close_all = close_all
|
|
||||||
self.state = {}
|
self.state = {}
|
||||||
self.toc = TOC()
|
self.toc = TOC()
|
||||||
self.file_name = ''
|
self.file_name = ''
|
||||||
@ -156,6 +155,7 @@ class PML_HTMLizer(object):
|
|||||||
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
|
html = re.sub(r'(?u)%s\s*%s' % (open % '.*?', close), '', html)
|
||||||
else:
|
else:
|
||||||
html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
|
html = re.sub(r'(?u)%s\s*%s' % (open, close), '', html)
|
||||||
|
html = re.sub(r'<p>\s*</p>', '', html)
|
||||||
return html
|
return html
|
||||||
|
|
||||||
def start_line(self):
|
def start_line(self):
|
||||||
@ -173,11 +173,22 @@ class PML_HTMLizer(object):
|
|||||||
def end_line(self):
|
def end_line(self):
|
||||||
end = u''
|
end = u''
|
||||||
|
|
||||||
|
div = []
|
||||||
|
span = []
|
||||||
|
other = []
|
||||||
|
|
||||||
for key, val in self.state.items():
|
for key, val in self.state.items():
|
||||||
if val[0]:
|
if val[0]:
|
||||||
if key == 'T':
|
if key == 'T':
|
||||||
self.state['T'][0] = False
|
self.state['T'][0] = False
|
||||||
end += self.STATES_TAGS[key][1]
|
elif key in self.DIV_STATES:
|
||||||
|
div.append(key)
|
||||||
|
elif key in self.SPAN_STATES:
|
||||||
|
span.append(key)
|
||||||
|
else:
|
||||||
|
other.append(key)
|
||||||
|
for key in span+div+other:
|
||||||
|
end += self.STATES_TAGS[key][1]
|
||||||
|
|
||||||
return u'%s</p>' % end
|
return u'%s</p>' % end
|
||||||
|
|
||||||
@ -214,12 +225,6 @@ class PML_HTMLizer(object):
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
def process_code_div_span(self, code, stream):
|
def process_code_div_span(self, code, stream):
|
||||||
if self.close_all:
|
|
||||||
return self.process_code_div_span_call(code, stream)
|
|
||||||
else:
|
|
||||||
return self.process_code_div_span_ind(code, stream)
|
|
||||||
|
|
||||||
def process_code_div_span_ind(self, code, stream):
|
|
||||||
text = u''
|
text = u''
|
||||||
ds = []
|
ds = []
|
||||||
|
|
||||||
@ -246,47 +251,24 @@ class PML_HTMLizer(object):
|
|||||||
else:
|
else:
|
||||||
text += self.STATES_TAGS[c][0]
|
text += self.STATES_TAGS[c][0]
|
||||||
else:
|
else:
|
||||||
if code in self.STATES_VALUE_REQ:
|
# Close all spans if code is a div
|
||||||
val = self.code_value(stream)
|
for c in ss:
|
||||||
text = self.STATES_TAGS[code][0] % val
|
|
||||||
self.state[code][1] = val
|
|
||||||
else:
|
|
||||||
text = self.STATES_TAGS[code][0]
|
|
||||||
|
|
||||||
self.state[code][0] = not self.state[code][0]
|
|
||||||
|
|
||||||
return text
|
|
||||||
|
|
||||||
def process_code_div_span_call(self, code, stream):
|
|
||||||
text = u''
|
|
||||||
divs = self.DIV_STATES[:]
|
|
||||||
spans = self.SPAN_STATES[:]
|
|
||||||
|
|
||||||
code = self.CODE_STATES[code]
|
|
||||||
|
|
||||||
if self.state[code][0]:
|
|
||||||
# Close all divs then spans.
|
|
||||||
for c in spans+divs:
|
|
||||||
if self.state[c][0]:
|
if self.state[c][0]:
|
||||||
text += self.STATES_TAGS[c][1]
|
text += self.STATES_TAGS[c][1]
|
||||||
# Reopen the based on state. Open divs then spans
|
# Process the code
|
||||||
if code in self.DIV_STATES:
|
if code in self.STATES_VALUE_REQ:
|
||||||
del divs[divs.index(code)]
|
val = self.code_value(stream)
|
||||||
if code in self.SPAN_STATES:
|
text += self.STATES_TAGS[code][0] % val
|
||||||
del spans[spans.index(code)]
|
self.state[code][1] = val
|
||||||
for c in divs+spans:
|
else:
|
||||||
|
text += self.STATES_TAGS[code][0]
|
||||||
|
# Re-open all spans if code was a div based on state
|
||||||
|
for c in ss:
|
||||||
if self.state[c][0]:
|
if self.state[c][0]:
|
||||||
if c in self.STATES_VALUE_REQ:
|
if c in self.STATES_VALUE_REQ:
|
||||||
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
|
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
|
||||||
else:
|
else:
|
||||||
text += self.STATES_TAGS[c][0]
|
text += self.STATES_TAGS[c][0]
|
||||||
else:
|
|
||||||
if code in self.STATES_VALUE_REQ:
|
|
||||||
val = self.code_value(stream)
|
|
||||||
text = self.STATES_TAGS[code][0] % val
|
|
||||||
self.state[code][1] = val
|
|
||||||
else:
|
|
||||||
text = self.STATES_TAGS[code][0]
|
|
||||||
|
|
||||||
self.state[code][0] = not self.state[code][0]
|
self.state[code][0] = not self.state[code][0]
|
||||||
|
|
||||||
@ -294,19 +276,32 @@ class PML_HTMLizer(object):
|
|||||||
|
|
||||||
def code_value(self, stream):
|
def code_value(self, stream):
|
||||||
value = u''
|
value = u''
|
||||||
open = False
|
# state 0 is before =
|
||||||
|
# state 1 is before the first "
|
||||||
|
# state 2 is before the second "
|
||||||
|
state = 0
|
||||||
|
loc = stream.tell()
|
||||||
|
|
||||||
c = stream.read(1)
|
c = stream.read(1)
|
||||||
while c != '':
|
while c != '':
|
||||||
if open and c != '"':
|
if state == 0:
|
||||||
value += c
|
if c == '=':
|
||||||
if c == '"':
|
state = 1
|
||||||
if not open:
|
elif state == 1:
|
||||||
open = True
|
if c == '"':
|
||||||
else:
|
state = 2
|
||||||
|
elif state == 2:
|
||||||
|
if c == '"':
|
||||||
|
state = 3
|
||||||
break
|
break
|
||||||
|
else:
|
||||||
|
value += c
|
||||||
c = stream.read(1)
|
c = stream.read(1)
|
||||||
|
|
||||||
|
if state != 3:
|
||||||
|
stream.seek(loc)
|
||||||
|
value = u''
|
||||||
|
|
||||||
return value.strip()
|
return value.strip()
|
||||||
|
|
||||||
def parse_pml(self, pml, file_name=''):
|
def parse_pml(self, pml, file_name=''):
|
||||||
@ -321,13 +316,15 @@ class PML_HTMLizer(object):
|
|||||||
self.state[s] = [False, ''];
|
self.state[s] = [False, ''];
|
||||||
|
|
||||||
for line in pml.splitlines():
|
for line in pml.splitlines():
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
parsed = []
|
parsed = []
|
||||||
empty = True
|
empty = True
|
||||||
|
|
||||||
|
line = self.prepare_line(line)
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
|
||||||
# Must use StringIO, cStringIO does not support unicode
|
# Must use StringIO, cStringIO does not support unicode
|
||||||
line = StringIO.StringIO(self.prepare_line(line))
|
line = StringIO.StringIO(line)
|
||||||
parsed.append(self.start_line())
|
parsed.append(self.start_line())
|
||||||
|
|
||||||
c = line.read(1)
|
c = line.read(1)
|
||||||
@ -405,13 +402,8 @@ class PML_HTMLizer(object):
|
|||||||
return self.toc
|
return self.toc
|
||||||
|
|
||||||
|
|
||||||
def pml_to_html(pml, close_all=False):
|
def pml_to_html(pml):
|
||||||
'''
|
hizer = PML_HTMLizer()
|
||||||
close_all will close div all div and span tags when one is closed and then
|
|
||||||
re-open the appropriate ones.
|
|
||||||
'''
|
|
||||||
|
|
||||||
hizer = PML_HTMLizer(close_all)
|
|
||||||
return hizer.parse_pml(pml)
|
return hizer.parse_pml(pml)
|
||||||
|
|
||||||
def footnote_sidebar_to_html(id, pml):
|
def footnote_sidebar_to_html(id, pml):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user