Upgarde to python-markdown 1.7

This commit is contained in:
Kovid Goyal 2008-03-01 18:38:17 +00:00
parent 862f75bf2c
commit e54893d9a8
2 changed files with 179 additions and 134 deletions

View File

@ -1,8 +1,8 @@
#!/usr/bin/env python #!/usr/bin/env python
version = "1.7" version = "1.7"
version_info = (1,7,0,"rc-1") version_info = (1,7,0,"rc-2")
__revision__ = "$Rev: 66 $" __revision__ = "$Rev: 72 $"
""" """
Python-Markdown Python-Markdown
@ -31,7 +31,7 @@ License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
""" """
import re, sys, os, random, codecs import re, sys, codecs
from logging import getLogger, StreamHandler, Formatter, \ from logging import getLogger, StreamHandler, Formatter, \
DEBUG, INFO, WARN, ERROR, CRITICAL DEBUG, INFO, WARN, ERROR, CRITICAL
@ -108,7 +108,7 @@ BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
'form', 'fieldset', 'iframe', 'math', 'ins', 'form', 'fieldset', 'iframe', 'math', 'ins',
'del', 'hr', 'hr/', 'style'] 'del', 'hr', 'hr/', 'style']
def is_block_level (tag): def isBlockLevel (tag):
return ( (tag in BLOCK_LEVEL_ELEMENTS) or return ( (tag in BLOCK_LEVEL_ELEMENTS) or
(tag[0] == 'h' and tag[1] in "0123456789") ) (tag[0] == 'h' and tag[1] in "0123456789") )
@ -127,8 +127,7 @@ want. It also adds extra white space when converting DOM to string
ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&"), ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&"),
(re.compile("<"), "&lt;"), (re.compile("<"), "&lt;"),
(re.compile(">"), "&gt;"), (re.compile(">"), "&gt;")]
(re.compile("\""), "&quot;")]
ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"), ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&amp;"),
(re.compile("<"), "&lt;"), (re.compile("<"), "&lt;"),
@ -406,85 +405,43 @@ class EntityReference:
Preprocessors munge source text before we start doing anything too Preprocessors munge source text before we start doing anything too
complicated. complicated.
Each preprocessor implements a "run" method that takes a pointer to a There are two types of preprocessors: TextPreprocessor and Preprocessor.
list of lines of the document, modifies it as necessary and returns
either the same pointer or a pointer to a new list. Preprocessors
must extend markdown.Preprocessor.
""" """
class Preprocessor: class TextPreprocessor:
'''
TextPreprocessors are run before the text is broken into lines.
Each TextPreprocessor implements a "run" method that takes a pointer to a
text string of the document, modifies it as necessary and returns
either the same pointer or a pointer to a new string.
TextPreprocessors must extend markdown.TextPreprocessor.
'''
def run(self, text):
pass pass
class HeaderPreprocessor (Preprocessor): class Preprocessor:
'''
Preprocessors are run after the text is broken into lines.
""" Each preprocessor implements a "run" method that takes a pointer to a
Replaces underlined headers with hashed headers to avoid list of lines of the document, modifies it as necessary and returns
the nead for lookahead later. either the same pointer or a pointer to a new list.
"""
def run (self, lines): Preprocessors must extend markdown.Preprocessor.
'''
i = -1 def run(self, lines):
while i+1 < len(lines): pass
i = i+1
if not lines[i].strip():
continue
if lines[i].startswith("#"):
lines.insert(i+1, "\n")
if (i+1 <= len(lines)
and lines[i+1]
and lines[i+1][0] in ['-', '=']):
underline = lines[i+1].strip()
if underline == "="*len(underline):
lines[i] = "# " + lines[i].strip()
lines[i+1] = ""
elif underline == "-"*len(underline):
lines[i] = "## " + lines[i].strip()
lines[i+1] = ""
return lines
HEADER_PREPROCESSOR = HeaderPreprocessor()
class LinePreprocessor (Preprocessor):
"""Deals with HR lines (needs to be done before processing lists)"""
blockquote_re = re.compile(r'^(> )+')
def run (self, lines):
for i in range(len(lines)):
prefix = ''
m = self.blockquote_re.search(lines[i])
if m : prefix = m.group(0)
if self._isLine(lines[i][len(prefix):]):
lines[i] = prefix + self.stash.store("<hr />", safe=True)
return lines
def _isLine(self, block):
"""Determines if a block should be replaced with an <:wHR>"""
if block.startswith(" "): return 0 # a code block
text = "".join([x for x in block if not x.isspace()])
if len(text) <= 2:
return 0
for pattern in ['isline1', 'isline2', 'isline3']:
m = RE.regExp[pattern].match(text)
if (m and m.group(1)):
return 1
else:
return 0
LINE_PREPROCESSOR = LinePreprocessor()
class HtmlBlockPreprocessor (Preprocessor): class HtmlBlockPreprocessor(TextPreprocessor):
"""Removes html blocks from self.lines""" """Removes html blocks from the source text and stores it."""
def _get_left_tag(self, block): def _get_left_tag(self, block):
return block[1:].replace(">", " ", 1).split()[0].lower() return block[1:].replace(">", " ", 1).split()[0].lower()
@ -495,7 +452,7 @@ class HtmlBlockPreprocessor (Preprocessor):
def _equal_tags(self, left_tag, right_tag): def _equal_tags(self, left_tag, right_tag):
if left_tag in ['?', '?php', 'div']: # handle PHP, etc. if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
return True return True
if ("/" + left_tag) == right_tag: if ("/" + left_tag) == right_tag:
return True return True
@ -511,7 +468,7 @@ class HtmlBlockPreprocessor (Preprocessor):
return (tag in ['hr', 'hr/']) return (tag in ['hr', 'hr/'])
def run (self, text): def run(self, text):
new_blocks = [] new_blocks = []
text = text.split("\n\n") text = text.split("\n\n")
@ -532,7 +489,7 @@ class HtmlBlockPreprocessor (Preprocessor):
left_tag = self._get_left_tag(block) left_tag = self._get_left_tag(block)
right_tag = self._get_right_tag(left_tag, block) right_tag = self._get_right_tag(left_tag, block)
if not (is_block_level(left_tag) \ if not (isBlockLevel(left_tag) \
or block[1] in ["!", "?", "@", "%"]): or block[1] in ["!", "?", "@", "%"]):
new_blocks.append(block) new_blocks.append(block)
continue continue
@ -581,7 +538,76 @@ class HtmlBlockPreprocessor (Preprocessor):
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
class ReferencePreprocessor (Preprocessor): class HeaderPreprocessor(Preprocessor):
"""
Replaces underlined headers with hashed headers to avoid
the nead for lookahead later.
"""
def run (self, lines):
i = -1
while i+1 < len(lines):
i = i+1
if not lines[i].strip():
continue
if lines[i].startswith("#"):
lines.insert(i+1, "\n")
if (i+1 <= len(lines)
and lines[i+1]
and lines[i+1][0] in ['-', '=']):
underline = lines[i+1].strip()
if underline == "="*len(underline):
lines[i] = "# " + lines[i].strip()
lines[i+1] = ""
elif underline == "-"*len(underline):
lines[i] = "## " + lines[i].strip()
lines[i+1] = ""
return lines
HEADER_PREPROCESSOR = HeaderPreprocessor()
class LinePreprocessor(Preprocessor):
"""Deals with HR lines (needs to be done before processing lists)"""
blockquote_re = re.compile(r'^(> )+')
def run (self, lines):
for i in range(len(lines)):
prefix = ''
m = self.blockquote_re.search(lines[i])
if m : prefix = m.group(0)
if self._isLine(lines[i][len(prefix):]):
lines[i] = prefix + self.stash.store("<hr />", safe=True)
return lines
def _isLine(self, block):
"""Determines if a block should be replaced with an <HR>"""
if block.startswith(" "): return 0 # a code block
text = "".join([x for x in block if not x.isspace()])
if len(text) <= 2:
return 0
for pattern in ['isline1', 'isline2', 'isline3']:
m = RE.regExp[pattern].match(text)
if (m and m.group(1)):
return 1
else:
return 0
LINE_PREPROCESSOR = LinePreprocessor()
class ReferencePreprocessor(Preprocessor):
'''
Removes reference definitions from the text and stores them for later use.
'''
def run (self, lines): def run (self, lines):
@ -890,36 +916,47 @@ AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
====================================================================== ======================================================================
Markdown also allows post-processors, which are similar to Markdown also allows post-processors, which are similar to
preprocessors in that they need to implement a "run" method. Unlike preprocessors in that they need to implement a "run" method. However,
pre-processors, they take a NanoDom document as a parameter and work they are run after core processing.
with that.
Post-Processor should extend markdown.Postprocessor. There are two types of post-processors: Postprocessor and TextPostprocessor
There are currently no standard post-processors, but the footnote
extension below uses one.
""" """
class Postprocessor: class Postprocessor:
'''
Postprocessors are run before the dom it converted back into text.
Each Postprocessor implements a "run" method that takes a pointer to a
NanoDom document, modifies it as necessary and returns a NanoDom
document.
Postprocessors must extend markdown.Postprocessor.
There are currently no standard post-processors, but the footnote
extension uses one.
'''
def run(self, dom):
pass pass
"""
======================================================================
======================== TEXT-POST-PROCESSORS ========================
======================================================================
Markdown also allows text-post-processors, which are similar to class TextPostprocessor:
textpreprocessors in that they need to implement a "run" method. '''
Unlike post-processors, they take a text string as a parameter and TextPostprocessors are run after the dom it converted back into text.
should return a string.
Text-Post-Processors should extend markdown.Postprocessor. Each TextPostprocessor implements a "run" method that takes a pointer to a
text string, modifies it as necessary and returns a text string.
""" TextPostprocessors must extend markdown.TextPostprocessor.
'''
def run(self, text):
pass
class RawHtmlTextPostprocessor(Postprocessor): class RawHtmlTextPostprocessor(TextPostprocessor):
def __init__(self): def __init__(self):
pass pass
@ -1204,7 +1241,6 @@ class Markdown:
configs_for_ext = configs[ext] configs_for_ext = configs[ext]
else: else:
configs_for_ext = [] configs_for_ext = []
extension = module.makeExtension(configs_for_ext) extension = module.makeExtension(configs_for_ext)
extension.extendMarkdown(self, globals()) extension.extendMarkdown(self, globals())
@ -1367,14 +1403,14 @@ class Markdown:
level = len(m.group(1)) level = len(m.group(1))
h = self.doc.createElement("h%d" % level) h = self.doc.createElement("h%d" % level)
parent_elem.appendChild(h) parent_elem.appendChild(h)
for item in self._handleInlineWrapper(m.group(2).strip()): for item in self._handleInline(m.group(2).strip()):
h.appendChild(item) h.appendChild(item)
else: else:
message(CRITICAL, "We've got a problem header!") message(CRITICAL, "We've got a problem header!")
def _processParagraph(self, parent_elem, paragraph, inList, looseList): def _processParagraph(self, parent_elem, paragraph, inList, looseList):
list = self._handleInlineWrapper("\n".join(paragraph)) list = self._handleInline("\n".join(paragraph))
if ( parent_elem.nodeName == 'li' if ( parent_elem.nodeName == 'li'
and not (looseList or parent_elem.childNodes)): and not (looseList or parent_elem.childNodes)):
@ -1563,7 +1599,17 @@ class Markdown:
def _handleInlineWrapper (self, line, patternIndex=0): def _handleInline (self, line, patternIndex=0):
"""Transform a Markdown line with inline elements to an XHTML
fragment.
This function uses auxiliary objects called inline patterns.
See notes on inline patterns above.
@param line: A line of Markdown text
@param patternIndex: The index of the inlinePattern to start with
@return: A list of NanoDom nodes """
parts = [line] parts = [line]
@ -1597,26 +1643,7 @@ class Markdown:
return parts return parts
def _handleInline(self, line): def _applyPattern(self, line, pattern, patternIndex):
"""Transform a Markdown line with inline elements to an XHTML
fragment.
This function uses auxiliary objects called inline patterns.
See notes on inline patterns above.
@param item: A block of Markdown text
@return: A list of NanoDom nodes """
if not(line):
return [self.doc.createTextNode(' ')]
for pattern in self.inlinePatterns:
list = self._applyPattern( line, pattern)
if list: return list
return [self.doc.createTextNode(line)]
def _applyPattern(self, line, pattern, patternIndex=0):
""" Given a pattern name, this function checks if the line """ Given a pattern name, this function checks if the line
fits the pattern, creates the necessary elements, and returns fits the pattern, creates the necessary elements, and returns
@ -1650,7 +1677,7 @@ class Markdown:
for child in node.childNodes: for child in node.childNodes:
if isinstance(child, TextNode): if isinstance(child, TextNode):
result = self._handleInlineWrapper(child.value, patternIndex+1) result = self._handleInline(child.value, patternIndex+1)
if result: if result:
@ -1879,12 +1906,28 @@ def parse_options():
'extensions': options.extensions, 'extensions': options.extensions,
'encoding': options.encoding } 'encoding': options.encoding }
def main(): def main():
options = parse_options() options = parse_options()
#if os.access(inFile, os.R_OK):
if not options: if not options:
return 0 sys.exit(0)
markdownFromFile(**options) markdownFromFile(**options)
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(main())
""" Run Markdown from the command line. """ """ Run Markdown from the command line. """
sys.exit(main)

View File

@ -1,7 +1,4 @@
""" """
## To see this file as plain text go to
## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
========================= FOOTNOTES ================================= ========================= FOOTNOTES =================================
This section adds footnote handling to markdown. It can be used as This section adds footnote handling to markdown. It can be used as
@ -119,7 +116,7 @@ class FootnoteExtension (markdown.Extension):
li = doc.createElement('li') li = doc.createElement('li')
li.setAttribute('id', self.makeFootnoteId(i)) li.setAttribute('id', self.makeFootnoteId(i))
self.md._processSection(li, self.footnotes[id].split("\n")) self.md._processSection(li, self.footnotes[id].split("\n"), looseList=1)
#li.appendChild(doc.createTextNode(self.footnotes[id])) #li.appendChild(doc.createTextNode(self.footnotes[id]))
@ -133,8 +130,13 @@ class FootnoteExtension (markdown.Extension):
if li.childNodes : if li.childNodes :
node = li.childNodes[-1] node = li.childNodes[-1]
if node.type == "text" : if node.type == "text" :
node = li li.appendChild(backlink)
elif node.nodeName == "p":
node.appendChild(backlink) node.appendChild(backlink)
else:
p = doc.createElement('p')
p.appendChild(backlink)
li.appendChild(p)
ol.appendChild(li) ol.appendChild(li)