PML Input: Make footnotes and sidebars display better and add return link.

This commit is contained in:
John Schember 2009-12-05 15:41:50 -05:00
parent 16cfdf8ea8
commit 3263a8c3ba
2 changed files with 110 additions and 46 deletions

View File

@ -103,7 +103,7 @@ class Reader132(FormatReader):
return self.decompress_text(number) return self.decompress_text(number)
def extract_content(self, output_dir): def extract_content(self, output_dir):
from calibre.ebooks.pml.pmlconverter import footnote_sidebar_to_html from calibre.ebooks.pml.pmlconverter import footnote_to_html, sidebar_to_html
from calibre.ebooks.pml.pmlconverter import pml_to_html from calibre.ebooks.pml.pmlconverter import pml_to_html
output_dir = os.path.abspath(output_dir) output_dir = os.path.abspath(output_dir)
@ -127,18 +127,14 @@ class Reader132(FormatReader):
footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding)) footnoteids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.footnote_offset).decode('cp1252' if self.encoding is None else self.encoding))
for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)): for fid, i in enumerate(range(self.header_record.footnote_offset + 1, self.header_record.footnote_offset + self.header_record.footnote_count)):
self.log.debug('Extracting footnote page %i' % i) self.log.debug('Extracting footnote page %i' % i)
html += '<dl>' html += footnote_to_html(footnoteids[fid], self.decompress_text(i))
html += footnote_sidebar_to_html(footnoteids[fid], self.decompress_text(i))
html += '</dl>'
if self.header_record.sidebar_count > 0: if self.header_record.sidebar_count > 0:
html += '<br /><h1>%s</h1>' % _('Sidebar') html += '<br /><h1>%s</h1>' % _('Sidebar')
sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding)) sidebarids = re.findall('\w+(?=\x00)', self.section_data(self.header_record.sidebar_offset).decode('cp1252' if self.encoding is None else self.encoding))
for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)): for sid, i in enumerate(range(self.header_record.sidebar_offset + 1, self.header_record.sidebar_offset + self.header_record.sidebar_count)):
self.log.debug('Extracting sidebar page %i' % i) self.log.debug('Extracting sidebar page %i' % i)
html += '<dl>' html += sidebar_to_html(sidebarids[sid], self.decompress_text(i))
html += footnote_sidebar_to_html(sidebarids[sid], self.decompress_text(i))
html += '</dl>'
html += '</body></html>' html += '</body></html>'

View File

@ -30,6 +30,7 @@ class PML_HTMLizer(object):
'h5', 'h5',
'h6', 'h6',
'a', 'a',
'ra',
'c', 'c',
'r', 'r',
't', 't',
@ -37,15 +38,24 @@ class PML_HTMLizer(object):
'l', 'l',
'k', 'k',
'T', 'T',
'Fn', 'FN',
'Sd', 'SB',
'FS'
] ]
STATES_VALUE_REQ = [ STATES_VALUE_REQ = [
'a', 'a',
'T', 'T',
'FS' ]
STATES_VALUE_REQ_2 = [
'ra',
'FN',
'SB',
]
STATES_CLOSE_VALUE_REQ = [
'FN',
'SB',
] ]
STATES_TAGS = { STATES_TAGS = {
@ -57,7 +67,8 @@ class PML_HTMLizer(object):
'h6': ('<h6>', '</h6>'), 'h6': ('<h6>', '</h6>'),
'sp': ('<sup>', '</sup>'), 'sp': ('<sup>', '</sup>'),
'sb': ('<sub>', '</sub>'), 'sb': ('<sub>', '</sub>'),
'a': ('<a href="%s">', '</a>'), 'a': ('<a href="#%s">', '</a>'),
'ra': ('<span id="r%s"></span><a href="#%s">', '</a>'),
'c': ('<div style="text-align: center; margin: auto;">', '</div>'), 'c': ('<div style="text-align: center; margin: auto;">', '</div>'),
'r': ('<div style="text-align: right;">', '</div>'), 'r': ('<div style="text-align: right;">', '</div>'),
't': ('<div style="margin-left: 5%;">', '</div>'), 't': ('<div style="margin-left: 5%;">', '</div>'),
@ -68,7 +79,8 @@ class PML_HTMLizer(object):
'b': ('<span style="font-weight: bold;">', '</span>'), 'b': ('<span style="font-weight: bold;">', '</span>'),
'l': ('<span style="font-size: 150%;">', '</span>'), 'l': ('<span style="font-size: 150%;">', '</span>'),
'k': ('<span style="font-size: 75%;">', '</span>'), 'k': ('<span style="font-size: 75%;">', '</span>'),
'FS': ('<div id="%s">', '</div>'), 'FN': ('<br /><br style="page-break-after: always;" /><div id="fn-%s"><dl><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rfn-%s">return</a></small></div>'),
'SB': ('<br /><br style="page-break-after: always;" /><div id="sb-%s"><dl><dt><dt>%s</dt><dd><p>', '</p></dd></dl><small><a href="#rsb-%s">return</a></small></div>'),
} }
CODE_STATES = { CODE_STATES = {
@ -93,14 +105,20 @@ class PML_HTMLizer(object):
'B': 'b', 'B': 'b',
'l': 'l', 'l': 'l',
'k': 'k', 'k': 'k',
'Fn': 'a', 'Fn': 'ra',
'Sd': 'a', 'Sd': 'ra',
'FN': 'FS', 'FN': 'FN',
'SB': 'FS', 'SB': 'SB',
} }
LINK_STATES = [
'a',
'ra',
]
BLOCK_STATES = [ BLOCK_STATES = [
'a', 'a',
'ra',
'h1', 'h1',
'h2', 'h2',
'h3', 'h3',
@ -116,7 +134,8 @@ class PML_HTMLizer(object):
'r', 'r',
't', 't',
'T', 'T',
'FS', 'FN',
'SB',
] ]
SPAN_STATES = [ SPAN_STATES = [
@ -144,8 +163,8 @@ class PML_HTMLizer(object):
pml = re.sub(r'(?mus)^[ ]*$', '', pml) pml = re.sub(r'(?mus)^[ ]*$', '', pml)
# Footnotes and Sidebars # Footnotes and Sidebars
pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="fns-%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml) pml = re.sub(r'(?mus)<footnote\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</footnote>', lambda match: '\\FN="%s"%s\\FN' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="fns-%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml) pml = re.sub(r'(?mus)<sidebar\s+id="(?P<target>.+?)">\s*(?P<text>.*?)\s*</sidebar>', lambda match: '\\SB="%s"%s\\SB' % (match.group('target'), match.group('text')) if match.group('text') else '', pml)
# Convert &'s into entities so &amp; in the text doesn't get turned into # Convert &'s into entities so &amp; in the text doesn't get turned into
# &. It will display as &amp; # &. It will display as &amp;
@ -181,10 +200,12 @@ class PML_HTMLizer(object):
for key, val in self.state.items(): for key, val in self.state.items():
if val[0]: if val[0]:
if key not in self.STATES_VALUE_REQ: if key in self.STATES_VALUE_REQ:
start += self.STATES_TAGS[key][0]
else:
start += self.STATES_TAGS[key][0] % val[1] start += self.STATES_TAGS[key][0] % val[1]
elif key in self.STATES_VALUE_REQ_2:
start += self.STATES_TAGS[key][0] % (val[1], val[1])
else:
start += self.STATES_TAGS[key][0]
return u'<p>%s' % start return u'<p>%s' % start
@ -206,7 +227,10 @@ class PML_HTMLizer(object):
else: else:
other.append(key) other.append(key)
for key in span+div+other: for key in span+div+other:
end += self.STATES_TAGS[key][1] if key in self.STATES_CLOSE_VALUE_REQ:
end += self.STATES_TAGS[key][1] % self.state[key][1]
else:
end += self.STATES_TAGS[key][1]
return u'%s</p>' % end return u'%s</p>' % end
@ -239,11 +263,17 @@ class PML_HTMLizer(object):
text = u'' text = u''
if self.state[code][0]: if self.state[code][0]:
text = self.STATES_TAGS[code][1] if code in self.STATES_CLOSE_VALUE_REQ:
text = self.STATES_TAGS[code][1] % self.state[code][1]
else:
text = self.STATES_TAGS[code][1]
else: else:
if code in self.STATES_VALUE_REQ: if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
val = self.code_value(stream) val = self.code_value(stream)
text += self.STATES_TAGS[code][0] % val if code in self.STATES_VALUE_REQ:
text = self.STATES_TAGS[code][0] % val
else:
text = self.STATES_TAGS[code][0] % (val, val)
self.state[code][1] = val self.state[code][1] = val
else: else:
text = self.STATES_TAGS[code][0] text = self.STATES_TAGS[code][0]
@ -258,7 +288,10 @@ class PML_HTMLizer(object):
# Close all. # Close all.
for c in self.SPAN_STATES+self.DIV_STATES: for c in self.SPAN_STATES+self.DIV_STATES:
if self.state[c][0]: if self.state[c][0]:
text += self.STATES_TAGS[c][1] if c in self.STATES_CLOSE_VALUE_REQ:
text += self.STATES_TAGS[c][1] % self.state[c][1]
else:
text += self.STATES_TAGS[c][1]
# Reopen the based on state. # Reopen the based on state.
for c in self.DIV_STATES+self.SPAN_STATES: for c in self.DIV_STATES+self.SPAN_STATES:
if code == c: if code == c:
@ -266,6 +299,8 @@ class PML_HTMLizer(object):
if self.state[c][0]: if self.state[c][0]:
if c in self.STATES_VALUE_REQ: if c in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1] text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
elif c in self.STATES_VALUE_REQ_2:
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
else: else:
text += self.STATES_TAGS[c][0] text += self.STATES_TAGS[c][0]
# Open code. # Open code.
@ -273,11 +308,17 @@ class PML_HTMLizer(object):
# Close all spans. # Close all spans.
for c in self.SPAN_STATES: for c in self.SPAN_STATES:
if self.state[c][0]: if self.state[c][0]:
text += self.STATES_TAGS[c][1] if c in self.STATES_CLOSE_VALUE_REQ:
text += self.STATES_TAGS[c][1] % self.state[c][1]
else:
text += self.STATES_TAGS[c][1]
# Process the code # Process the code
if code in self.STATES_VALUE_REQ: if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
val = self.code_value(stream) val = self.code_value(stream)
text += self.STATES_TAGS[code][0] % val if code in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[code][0] % val
else:
text += self.STATES_TAGS[code][0] % (val, val)
self.state[code][1] = val self.state[code][1] = val
else: else:
text += self.STATES_TAGS[code][0] text += self.STATES_TAGS[code][0]
@ -286,6 +327,8 @@ class PML_HTMLizer(object):
if self.state[c][0]: if self.state[c][0]:
if c in self.STATES_VALUE_REQ: if c in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1] text += self.STATES_TAGS[self.CODE_STATES[c]][0] % self.state[c][1]
elif c in self.STATES_VALUE_REQ_2:
text += self.STATES_TAGS[self.CODE_STATES[c]][0] % (self.state[c][1], self.state[c][1])
else: else:
text += self.STATES_TAGS[c][0] text += self.STATES_TAGS[c][0]
@ -299,7 +342,10 @@ class PML_HTMLizer(object):
# Close all spans # Close all spans
for c in self.SPAN_STATES: for c in self.SPAN_STATES:
if self.state[c][0]: if self.state[c][0]:
text += self.STATES_TAGS[c][1] if c in self.STATES_CLOSE_VALUE_REQ:
text += self.STATES_TAGS[c][1] % self.state[c][1]
else:
text += self.STATES_TAGS[c][1]
# Re-open the spans based on state except for code which will be # Re-open the spans based on state except for code which will be
# left closed. # left closed.
for c in self.SPAN_STATES: for c in self.SPAN_STATES:
@ -308,13 +354,18 @@ class PML_HTMLizer(object):
if self.state[c][0]: if self.state[c][0]:
if c in self.STATES_VALUE_REQ: if c in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[code][0] % self.state[c][1] text += self.STATES_TAGS[code][0] % self.state[c][1]
elif c in self.STATES_VALUE_REQ_2:
text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
else: else:
text += self.STATES_TAGS[c][0] text += self.STATES_TAGS[c][0]
# Open code. # Open code.
else: else:
if code in self.STATES_VALUE_REQ: if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
val = self.code_value(stream) val = self.code_value(stream)
text += self.STATES_TAGS[code][0] % val if code in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[code][0] % val
else:
text += self.STATES_TAGS[code][0] % (val, val)
self.state[code][1] = val self.state[code][1] = val
else: else:
text += self.STATES_TAGS[code][0] text += self.STATES_TAGS[code][0]
@ -327,19 +378,29 @@ class PML_HTMLizer(object):
# Close all spans # Close all spans
for c in self.SPAN_STATES: for c in self.SPAN_STATES:
if self.state[c][0]: if self.state[c][0]:
text += self.STATES_TAGS[c][1] if c in self.STATES_CLOSE_VALUE_REQ:
text += self.STATES_TAGS[c][1] % self.state[c][1]
else:
text += self.STATES_TAGS[c][1]
# Process the code # Process the code
if self.state[code][0]: if self.state[code][0]:
# Close tag # Close tag
text += self.STATES_TAGS[code][1] if code in self.STATES_CLOSE_VALUE_REQ:
text += self.STATES_TAGS[code][1] % self.state[code][1]
else:
text += self.STATES_TAGS[code][1]
else: else:
# Open tag # Open tag
if code in self.STATES_VALUE_REQ: if code in self.STATES_VALUE_REQ or code in self.STATES_VALUE_REQ_2:
val = self.code_value(stream) val = self.code_value(stream)
if code in self.LINK_STATES:
val = val.lstrip('#')
if pre: if pre:
val = '#%s-%s' % (pre, val) val = '%s-%s' % (pre, val)
text += self.STATES_TAGS[code][0] % val if code in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[code][0] % val
else:
text += self.STATES_TAGS[code][0] % (val, val)
self.state[code][1] = val self.state[code][1] = val
else: else:
text += self.STATES_TAGS[code][0] text += self.STATES_TAGS[code][0]
@ -349,6 +410,8 @@ class PML_HTMLizer(object):
if self.state[c][0]: if self.state[c][0]:
if c in self.STATES_VALUE_REQ: if c in self.STATES_VALUE_REQ:
text += self.STATES_TAGS[code][0] % self.state[c][1] text += self.STATES_TAGS[code][0] % self.state[c][1]
elif c in self.STATES_VALUE_REQ_2:
text += self.STATES_TAGS[code][0] % (self.state[c][1], self.state[c][1])
else: else:
text += self.STATES_TAGS[c][0] text += self.STATES_TAGS[c][0]
@ -432,13 +495,13 @@ class PML_HTMLizer(object):
elif c in 'FSX': elif c in 'FSX':
l = line.read(1) l = line.read(1)
if '%s%s' % (c, l) == 'Fn': if '%s%s' % (c, l) == 'Fn':
text = self.process_code('Fn', line, 'fns') text = self.process_code('Fn', line, 'fn')
elif '%s%s' % (c, l) == 'FN': elif '%s%s' % (c, l) == 'FN':
text = self.process_code('FN', line) text = self.process_code('FN', line)
elif '%s%s' % (c, l) == 'SB': elif '%s%s' % (c, l) == 'SB':
text = self.process_code('SB', line) text = self.process_code('SB', line)
elif '%s%s' % (c, l) == 'Sd': elif '%s%s' % (c, l) == 'Sd':
text = self.process_code('Sd', line, 'fns') text = self.process_code('Sd', line, 'sb')
else: else:
text = self.process_code('%s%s' % (c, l), line) text = self.process_code('%s%s' % (c, l), line)
elif c == 'm': elif c == 'm':
@ -496,8 +559,13 @@ def pml_to_html(pml):
hizer = PML_HTMLizer() hizer = PML_HTMLizer()
return hizer.parse_pml(pml) return hizer.parse_pml(pml)
def footnote_sidebar_to_html(id, pml): def footnote_sidebar_to_html(pre_id, id, pml):
if id.startswith('\x01'): id = id.strip('\x01')
id = id[2:] html = '<br /><br style="page-break-after: always;" /><div id="%s-%s"><dl><dt>%s</dt><dd><p>%s</p></dd></dl><small><a href="#r%s-%s">return</a></small></div>' % (pre_id, id, id, pml_to_html(pml), pre_id, id)
html = '<div id="fns-%s"><dt>%s</dt></div><dd>%s</dd>' % (id, id, pml_to_html(pml))
return html return html
def footnote_to_html(id, pml):
return footnote_sidebar_to_html('fn', id, pml)
def sidebar_to_html(id, pml):
return footnote_sidebar_to_html('sb', id, pml)