Markdown Output: Fix issues with pre tags.

This commit is contained in:
John Schember 2011-09-04 12:56:01 -04:00
parent c3a100fc59
commit adcf2a0cb6

View File

@ -55,10 +55,15 @@ class MarkdownMLizer(OEB2HTML):
def tidy_up(self, text): def tidy_up(self, text):
# Remove blank space form beginning of paragraph. # Remove blank space form beginning of paragraph.
text = re.sub('(?msu)^[ ]{1,3}', '', text) text = re.sub('(?msu)^[ ]{1,3}', '', text)
# pre has 4 spaces. We trimmed 3 so anything with a space left is a pre.
text = re.sub('(?msu)^[ ]', ' ', text)
# Remove spaces from blank lines. # Remove spaces from blank lines.
text = re.sub('(?msu)^[ ]+$', '', text) text = re.sub('(?msu)^[ ]+$', '', text)
# Reduce blank lines # Reduce blank lines
text = re.sub('(?msu)\n{7,}', '\n' * 6, text) text = re.sub('(?msu)\n{7,}', '\n' * 6, text)
# Remove blank lines at beginning and end of document. # Remove blank lines at beginning and end of document.
text = re.sub('^\s*', '', text) text = re.sub('^\s*', '', text)
text = re.sub('\s*$', '\n\n', text) text = re.sub('\s*$', '\n\n', text)
@ -80,6 +85,12 @@ class MarkdownMLizer(OEB2HTML):
def prepare_string_for_markdown(self, txt): def prepare_string_for_markdown(self, txt):
txt = re.sub(r'([\\`*_{}\[\]()#+!])', r'\\\1', txt) txt = re.sub(r'([\\`*_{}\[\]()#+!])', r'\\\1', txt)
return txt return txt
def prepare_string_for_pre(self, txt):
new_text = []
for l in txt.splitlines():
new_text.append(' ' + l)
return '\n'.join(new_text)
def dump_text(self, elem, stylizer): def dump_text(self, elem, stylizer):
''' '''
@ -97,7 +108,7 @@ class MarkdownMLizer(OEB2HTML):
return [''] return ['']
# Setup our variables. # Setup our variables.
text = [''] text = []
style = stylizer.style(elem) style = stylizer.style(elem)
tags = [] tags = []
tag = barename(elem.tag) tag = barename(elem.tag)
@ -143,29 +154,41 @@ class MarkdownMLizer(OEB2HTML):
self.blockquotes += 1 self.blockquotes += 1
tags.append('>') tags.append('>')
text.append('> ' * self.blockquotes) text.append('> ' * self.blockquotes)
elif tag in ('code', 'pre'): elif tag == 'code':
self.in_pre = True if not self.in_pre:
text.append(' ') text.append('`')
tags.append('`')
elif tag == 'pre':
if not self.in_pre:
text.append('\n')
tags.append('pre')
self.in_pre = True
elif tag == 'hr': elif tag == 'hr':
text.append('\n* * *') text.append('\n* * *')
tags.append('\n') tags.append('\n')
elif tag == 'a': elif tag == 'a':
# Only write links with absolute (external) urls. # Only write links with absolute (external) urls.
if attribs.has_key('href') and '://' in attribs['href']: if self.opts.keep_links and attribs.has_key('href') and '://' in attribs['href']:
title = '' title = ''
if attribs.has_key('title'): if attribs.has_key('title'):
title = ' "' + attribs['title'] + '" ' title = ' "' + attribs['title'] + '"'
remove_space = self.remove_space_after_newline
title = self.remove_newlines(title)
self.remove_space_after_newline = remove_space
text.append('[') text.append('[')
tags.append('](' + attribs['href'] + title + ')') tags.append('](' + attribs['href'] + title + ')')
elif tag == 'img': elif tag == 'img':
if self.opts.keep_image_references: if self.opts.keep_image_references:
txt = '!' txt = '!'
if attribs.has_key('alt'): if attribs.has_key('alt'):
txt += '[' + attribs['alt'] + ']' remove_space = self.remove_space_after_newline
txt += '[' + self.remove_newlines(attribs['alt']) + ']'
self.remove_space_after_newline = remove_space
txt += '(' + attribs['src'] + ')' txt += '(' + attribs['src'] + ')'
text.append(txt) text.append(txt)
elif tag in ('ol', 'ul'): elif tag in ('ol', 'ul'):
self.list.append({'name': tag, 'num': 0}) self.list.append({'name': tag, 'num': 0})
tags.append(tag)
elif tag == 'li': elif tag == 'li':
if self.list: if self.list:
li = self.list[-1] li = self.list[-1]
@ -182,7 +205,9 @@ class MarkdownMLizer(OEB2HTML):
# Process tags that contain text. # Process tags that contain text.
if hasattr(elem, 'text') and elem.text: if hasattr(elem, 'text') and elem.text:
txt = elem.text txt = elem.text
if not self.in_pre: if self.in_pre:
txt = self.prepare_string_for_pre(txt)
else:
txt = self.prepare_string_for_markdown(self.remove_newlines(txt)) txt = self.prepare_string_for_markdown(self.remove_newlines(txt))
text.append(txt) text.append(txt)
@ -193,16 +218,12 @@ class MarkdownMLizer(OEB2HTML):
# Close all open tags. # Close all open tags.
tags.reverse() tags.reverse()
for t in tags: for t in tags:
if t in ('pre', 'ul', 'ol', 'li', '>', 'block'): if t in ('pre', 'ul', 'ol', 'li', '>'):
if t == 'pre': if t == 'pre':
self.in_pre = False self.in_pre = False
text.append('\n')
elif t == '>': elif t == '>':
self.blockquotes -= 1 self.blockquotes -= 1
elif t == 'block':
if self.style_bold:
text.append('**')
if self.style_italic:
text.append('*')
elif t in ('ul', 'ol'): elif t in ('ul', 'ol'):
if self.list: if self.list:
self.list.pop() self.list.pop()
@ -224,7 +245,9 @@ class MarkdownMLizer(OEB2HTML):
# Add the text that is outside of the tag. # Add the text that is outside of the tag.
if hasattr(elem, 'tail') and elem.tail: if hasattr(elem, 'tail') and elem.tail:
tail = elem.tail tail = elem.tail
if not self.in_pre: if self.in_pre:
tail = self.prepare_string_for_pre(tail)
else:
tail = self.prepare_string_for_markdown(self.remove_newlines(tail)) tail = self.prepare_string_for_markdown(self.remove_newlines(tail))
text.append(tail) text.append(tail)