mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Upgarde to python-markdown 1.7
This commit is contained in:
parent
862f75bf2c
commit
e54893d9a8
@ -1,8 +1,8 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
|
|
||||||
version = "1.7"
|
version = "1.7"
|
||||||
version_info = (1,7,0,"rc-1")
|
version_info = (1,7,0,"rc-2")
|
||||||
__revision__ = "$Rev: 66 $"
|
__revision__ = "$Rev: 72 $"
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Python-Markdown
|
Python-Markdown
|
||||||
@ -31,7 +31,7 @@ License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
import re, sys, os, random, codecs
|
import re, sys, codecs
|
||||||
|
|
||||||
from logging import getLogger, StreamHandler, Formatter, \
|
from logging import getLogger, StreamHandler, Formatter, \
|
||||||
DEBUG, INFO, WARN, ERROR, CRITICAL
|
DEBUG, INFO, WARN, ERROR, CRITICAL
|
||||||
@ -108,7 +108,7 @@ BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table',
|
|||||||
'form', 'fieldset', 'iframe', 'math', 'ins',
|
'form', 'fieldset', 'iframe', 'math', 'ins',
|
||||||
'del', 'hr', 'hr/', 'style']
|
'del', 'hr', 'hr/', 'style']
|
||||||
|
|
||||||
def is_block_level (tag):
|
def isBlockLevel (tag):
|
||||||
return ( (tag in BLOCK_LEVEL_ELEMENTS) or
|
return ( (tag in BLOCK_LEVEL_ELEMENTS) or
|
||||||
(tag[0] == 'h' and tag[1] in "0123456789") )
|
(tag[0] == 'h' and tag[1] in "0123456789") )
|
||||||
|
|
||||||
@ -127,8 +127,7 @@ want. It also adds extra white space when converting DOM to string
|
|||||||
|
|
||||||
ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&"),
|
ENTITY_NORMALIZATION_EXPRESSIONS = [ (re.compile("&"), "&"),
|
||||||
(re.compile("<"), "<"),
|
(re.compile("<"), "<"),
|
||||||
(re.compile(">"), ">"),
|
(re.compile(">"), ">")]
|
||||||
(re.compile("\""), """)]
|
|
||||||
|
|
||||||
ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&"),
|
ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&"),
|
||||||
(re.compile("<"), "<"),
|
(re.compile("<"), "<"),
|
||||||
@ -406,85 +405,43 @@ class EntityReference:
|
|||||||
Preprocessors munge source text before we start doing anything too
|
Preprocessors munge source text before we start doing anything too
|
||||||
complicated.
|
complicated.
|
||||||
|
|
||||||
Each preprocessor implements a "run" method that takes a pointer to a
|
There are two types of preprocessors: TextPreprocessor and Preprocessor.
|
||||||
list of lines of the document, modifies it as necessary and returns
|
|
||||||
either the same pointer or a pointer to a new list. Preprocessors
|
|
||||||
must extend markdown.Preprocessor.
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Preprocessor:
|
class TextPreprocessor:
|
||||||
|
'''
|
||||||
|
TextPreprocessors are run before the text is broken into lines.
|
||||||
|
|
||||||
|
Each TextPreprocessor implements a "run" method that takes a pointer to a
|
||||||
|
text string of the document, modifies it as necessary and returns
|
||||||
|
either the same pointer or a pointer to a new string.
|
||||||
|
|
||||||
|
TextPreprocessors must extend markdown.TextPreprocessor.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def run(self, text):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class HeaderPreprocessor (Preprocessor):
|
class Preprocessor:
|
||||||
|
'''
|
||||||
|
Preprocessors are run after the text is broken into lines.
|
||||||
|
|
||||||
"""
|
Each preprocessor implements a "run" method that takes a pointer to a
|
||||||
Replaces underlined headers with hashed headers to avoid
|
list of lines of the document, modifies it as necessary and returns
|
||||||
the nead for lookahead later.
|
either the same pointer or a pointer to a new list.
|
||||||
"""
|
|
||||||
|
|
||||||
def run (self, lines):
|
Preprocessors must extend markdown.Preprocessor.
|
||||||
|
'''
|
||||||
|
|
||||||
i = -1
|
def run(self, lines):
|
||||||
while i+1 < len(lines):
|
pass
|
||||||
i = i+1
|
|
||||||
if not lines[i].strip():
|
|
||||||
continue
|
|
||||||
|
|
||||||
if lines[i].startswith("#"):
|
|
||||||
lines.insert(i+1, "\n")
|
|
||||||
|
|
||||||
if (i+1 <= len(lines)
|
|
||||||
and lines[i+1]
|
|
||||||
and lines[i+1][0] in ['-', '=']):
|
|
||||||
|
|
||||||
underline = lines[i+1].strip()
|
|
||||||
|
|
||||||
if underline == "="*len(underline):
|
|
||||||
lines[i] = "# " + lines[i].strip()
|
|
||||||
lines[i+1] = ""
|
|
||||||
elif underline == "-"*len(underline):
|
|
||||||
lines[i] = "## " + lines[i].strip()
|
|
||||||
lines[i+1] = ""
|
|
||||||
|
|
||||||
return lines
|
|
||||||
|
|
||||||
HEADER_PREPROCESSOR = HeaderPreprocessor()
|
|
||||||
|
|
||||||
class LinePreprocessor (Preprocessor):
|
|
||||||
"""Deals with HR lines (needs to be done before processing lists)"""
|
|
||||||
|
|
||||||
blockquote_re = re.compile(r'^(> )+')
|
|
||||||
|
|
||||||
def run (self, lines):
|
|
||||||
for i in range(len(lines)):
|
|
||||||
prefix = ''
|
|
||||||
m = self.blockquote_re.search(lines[i])
|
|
||||||
if m : prefix = m.group(0)
|
|
||||||
if self._isLine(lines[i][len(prefix):]):
|
|
||||||
lines[i] = prefix + self.stash.store("<hr />", safe=True)
|
|
||||||
return lines
|
|
||||||
|
|
||||||
def _isLine(self, block):
|
|
||||||
"""Determines if a block should be replaced with an <:wHR>"""
|
|
||||||
if block.startswith(" "): return 0 # a code block
|
|
||||||
text = "".join([x for x in block if not x.isspace()])
|
|
||||||
if len(text) <= 2:
|
|
||||||
return 0
|
|
||||||
for pattern in ['isline1', 'isline2', 'isline3']:
|
|
||||||
m = RE.regExp[pattern].match(text)
|
|
||||||
if (m and m.group(1)):
|
|
||||||
return 1
|
|
||||||
else:
|
|
||||||
return 0
|
|
||||||
|
|
||||||
LINE_PREPROCESSOR = LinePreprocessor()
|
|
||||||
|
|
||||||
|
|
||||||
class HtmlBlockPreprocessor (Preprocessor):
|
class HtmlBlockPreprocessor(TextPreprocessor):
|
||||||
"""Removes html blocks from self.lines"""
|
"""Removes html blocks from the source text and stores it."""
|
||||||
|
|
||||||
def _get_left_tag(self, block):
|
def _get_left_tag(self, block):
|
||||||
return block[1:].replace(">", " ", 1).split()[0].lower()
|
return block[1:].replace(">", " ", 1).split()[0].lower()
|
||||||
@ -495,7 +452,7 @@ class HtmlBlockPreprocessor (Preprocessor):
|
|||||||
|
|
||||||
def _equal_tags(self, left_tag, right_tag):
|
def _equal_tags(self, left_tag, right_tag):
|
||||||
|
|
||||||
if left_tag in ['?', '?php', 'div']: # handle PHP, etc.
|
if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
|
||||||
return True
|
return True
|
||||||
if ("/" + left_tag) == right_tag:
|
if ("/" + left_tag) == right_tag:
|
||||||
return True
|
return True
|
||||||
@ -511,7 +468,7 @@ class HtmlBlockPreprocessor (Preprocessor):
|
|||||||
return (tag in ['hr', 'hr/'])
|
return (tag in ['hr', 'hr/'])
|
||||||
|
|
||||||
|
|
||||||
def run (self, text):
|
def run(self, text):
|
||||||
|
|
||||||
new_blocks = []
|
new_blocks = []
|
||||||
text = text.split("\n\n")
|
text = text.split("\n\n")
|
||||||
@ -532,7 +489,7 @@ class HtmlBlockPreprocessor (Preprocessor):
|
|||||||
left_tag = self._get_left_tag(block)
|
left_tag = self._get_left_tag(block)
|
||||||
right_tag = self._get_right_tag(left_tag, block)
|
right_tag = self._get_right_tag(left_tag, block)
|
||||||
|
|
||||||
if not (is_block_level(left_tag) \
|
if not (isBlockLevel(left_tag) \
|
||||||
or block[1] in ["!", "?", "@", "%"]):
|
or block[1] in ["!", "?", "@", "%"]):
|
||||||
new_blocks.append(block)
|
new_blocks.append(block)
|
||||||
continue
|
continue
|
||||||
@ -581,7 +538,76 @@ class HtmlBlockPreprocessor (Preprocessor):
|
|||||||
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
|
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
|
||||||
|
|
||||||
|
|
||||||
class ReferencePreprocessor (Preprocessor):
|
class HeaderPreprocessor(Preprocessor):
|
||||||
|
|
||||||
|
"""
|
||||||
|
Replaces underlined headers with hashed headers to avoid
|
||||||
|
the nead for lookahead later.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run (self, lines):
|
||||||
|
|
||||||
|
i = -1
|
||||||
|
while i+1 < len(lines):
|
||||||
|
i = i+1
|
||||||
|
if not lines[i].strip():
|
||||||
|
continue
|
||||||
|
|
||||||
|
if lines[i].startswith("#"):
|
||||||
|
lines.insert(i+1, "\n")
|
||||||
|
|
||||||
|
if (i+1 <= len(lines)
|
||||||
|
and lines[i+1]
|
||||||
|
and lines[i+1][0] in ['-', '=']):
|
||||||
|
|
||||||
|
underline = lines[i+1].strip()
|
||||||
|
|
||||||
|
if underline == "="*len(underline):
|
||||||
|
lines[i] = "# " + lines[i].strip()
|
||||||
|
lines[i+1] = ""
|
||||||
|
elif underline == "-"*len(underline):
|
||||||
|
lines[i] = "## " + lines[i].strip()
|
||||||
|
lines[i+1] = ""
|
||||||
|
|
||||||
|
return lines
|
||||||
|
|
||||||
|
HEADER_PREPROCESSOR = HeaderPreprocessor()
|
||||||
|
|
||||||
|
|
||||||
|
class LinePreprocessor(Preprocessor):
|
||||||
|
"""Deals with HR lines (needs to be done before processing lists)"""
|
||||||
|
|
||||||
|
blockquote_re = re.compile(r'^(> )+')
|
||||||
|
|
||||||
|
def run (self, lines):
|
||||||
|
for i in range(len(lines)):
|
||||||
|
prefix = ''
|
||||||
|
m = self.blockquote_re.search(lines[i])
|
||||||
|
if m : prefix = m.group(0)
|
||||||
|
if self._isLine(lines[i][len(prefix):]):
|
||||||
|
lines[i] = prefix + self.stash.store("<hr />", safe=True)
|
||||||
|
return lines
|
||||||
|
|
||||||
|
def _isLine(self, block):
|
||||||
|
"""Determines if a block should be replaced with an <HR>"""
|
||||||
|
if block.startswith(" "): return 0 # a code block
|
||||||
|
text = "".join([x for x in block if not x.isspace()])
|
||||||
|
if len(text) <= 2:
|
||||||
|
return 0
|
||||||
|
for pattern in ['isline1', 'isline2', 'isline3']:
|
||||||
|
m = RE.regExp[pattern].match(text)
|
||||||
|
if (m and m.group(1)):
|
||||||
|
return 1
|
||||||
|
else:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
LINE_PREPROCESSOR = LinePreprocessor()
|
||||||
|
|
||||||
|
|
||||||
|
class ReferencePreprocessor(Preprocessor):
|
||||||
|
'''
|
||||||
|
Removes reference definitions from the text and stores them for later use.
|
||||||
|
'''
|
||||||
|
|
||||||
def run (self, lines):
|
def run (self, lines):
|
||||||
|
|
||||||
@ -890,36 +916,47 @@ AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE)
|
|||||||
======================================================================
|
======================================================================
|
||||||
|
|
||||||
Markdown also allows post-processors, which are similar to
|
Markdown also allows post-processors, which are similar to
|
||||||
preprocessors in that they need to implement a "run" method. Unlike
|
preprocessors in that they need to implement a "run" method. However,
|
||||||
pre-processors, they take a NanoDom document as a parameter and work
|
they are run after core processing.
|
||||||
with that.
|
|
||||||
|
|
||||||
Post-Processor should extend markdown.Postprocessor.
|
There are two types of post-processors: Postprocessor and TextPostprocessor
|
||||||
|
|
||||||
There are currently no standard post-processors, but the footnote
|
|
||||||
extension below uses one.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
class Postprocessor:
|
class Postprocessor:
|
||||||
|
'''
|
||||||
|
Postprocessors are run before the dom it converted back into text.
|
||||||
|
|
||||||
|
Each Postprocessor implements a "run" method that takes a pointer to a
|
||||||
|
NanoDom document, modifies it as necessary and returns a NanoDom
|
||||||
|
document.
|
||||||
|
|
||||||
|
Postprocessors must extend markdown.Postprocessor.
|
||||||
|
|
||||||
|
There are currently no standard post-processors, but the footnote
|
||||||
|
extension uses one.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def run(self, dom):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
======================================================================
|
|
||||||
======================== TEXT-POST-PROCESSORS ========================
|
|
||||||
======================================================================
|
|
||||||
|
|
||||||
Markdown also allows text-post-processors, which are similar to
|
class TextPostprocessor:
|
||||||
textpreprocessors in that they need to implement a "run" method.
|
'''
|
||||||
Unlike post-processors, they take a text string as a parameter and
|
TextPostprocessors are run after the dom it converted back into text.
|
||||||
should return a string.
|
|
||||||
|
|
||||||
Text-Post-Processors should extend markdown.Postprocessor.
|
Each TextPostprocessor implements a "run" method that takes a pointer to a
|
||||||
|
text string, modifies it as necessary and returns a text string.
|
||||||
|
|
||||||
"""
|
TextPostprocessors must extend markdown.TextPostprocessor.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def run(self, text):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class RawHtmlTextPostprocessor(Postprocessor):
|
class RawHtmlTextPostprocessor(TextPostprocessor):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
pass
|
pass
|
||||||
@ -1204,7 +1241,6 @@ class Markdown:
|
|||||||
configs_for_ext = configs[ext]
|
configs_for_ext = configs[ext]
|
||||||
else:
|
else:
|
||||||
configs_for_ext = []
|
configs_for_ext = []
|
||||||
|
|
||||||
extension = module.makeExtension(configs_for_ext)
|
extension = module.makeExtension(configs_for_ext)
|
||||||
extension.extendMarkdown(self, globals())
|
extension.extendMarkdown(self, globals())
|
||||||
|
|
||||||
@ -1367,14 +1403,14 @@ class Markdown:
|
|||||||
level = len(m.group(1))
|
level = len(m.group(1))
|
||||||
h = self.doc.createElement("h%d" % level)
|
h = self.doc.createElement("h%d" % level)
|
||||||
parent_elem.appendChild(h)
|
parent_elem.appendChild(h)
|
||||||
for item in self._handleInlineWrapper(m.group(2).strip()):
|
for item in self._handleInline(m.group(2).strip()):
|
||||||
h.appendChild(item)
|
h.appendChild(item)
|
||||||
else:
|
else:
|
||||||
message(CRITICAL, "We've got a problem header!")
|
message(CRITICAL, "We've got a problem header!")
|
||||||
|
|
||||||
|
|
||||||
def _processParagraph(self, parent_elem, paragraph, inList, looseList):
|
def _processParagraph(self, parent_elem, paragraph, inList, looseList):
|
||||||
list = self._handleInlineWrapper("\n".join(paragraph))
|
list = self._handleInline("\n".join(paragraph))
|
||||||
|
|
||||||
if ( parent_elem.nodeName == 'li'
|
if ( parent_elem.nodeName == 'li'
|
||||||
and not (looseList or parent_elem.childNodes)):
|
and not (looseList or parent_elem.childNodes)):
|
||||||
@ -1563,7 +1599,17 @@ class Markdown:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _handleInlineWrapper (self, line, patternIndex=0):
|
def _handleInline (self, line, patternIndex=0):
|
||||||
|
"""Transform a Markdown line with inline elements to an XHTML
|
||||||
|
fragment.
|
||||||
|
|
||||||
|
This function uses auxiliary objects called inline patterns.
|
||||||
|
See notes on inline patterns above.
|
||||||
|
|
||||||
|
@param line: A line of Markdown text
|
||||||
|
@param patternIndex: The index of the inlinePattern to start with
|
||||||
|
@return: A list of NanoDom nodes """
|
||||||
|
|
||||||
|
|
||||||
parts = [line]
|
parts = [line]
|
||||||
|
|
||||||
@ -1597,26 +1643,7 @@ class Markdown:
|
|||||||
return parts
|
return parts
|
||||||
|
|
||||||
|
|
||||||
def _handleInline(self, line):
|
def _applyPattern(self, line, pattern, patternIndex):
|
||||||
"""Transform a Markdown line with inline elements to an XHTML
|
|
||||||
fragment.
|
|
||||||
|
|
||||||
This function uses auxiliary objects called inline patterns.
|
|
||||||
See notes on inline patterns above.
|
|
||||||
|
|
||||||
@param item: A block of Markdown text
|
|
||||||
@return: A list of NanoDom nodes """
|
|
||||||
|
|
||||||
if not(line):
|
|
||||||
return [self.doc.createTextNode(' ')]
|
|
||||||
|
|
||||||
for pattern in self.inlinePatterns:
|
|
||||||
list = self._applyPattern( line, pattern)
|
|
||||||
if list: return list
|
|
||||||
|
|
||||||
return [self.doc.createTextNode(line)]
|
|
||||||
|
|
||||||
def _applyPattern(self, line, pattern, patternIndex=0):
|
|
||||||
|
|
||||||
""" Given a pattern name, this function checks if the line
|
""" Given a pattern name, this function checks if the line
|
||||||
fits the pattern, creates the necessary elements, and returns
|
fits the pattern, creates the necessary elements, and returns
|
||||||
@ -1650,7 +1677,7 @@ class Markdown:
|
|||||||
for child in node.childNodes:
|
for child in node.childNodes:
|
||||||
if isinstance(child, TextNode):
|
if isinstance(child, TextNode):
|
||||||
|
|
||||||
result = self._handleInlineWrapper(child.value, patternIndex+1)
|
result = self._handleInline(child.value, patternIndex+1)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
|
|
||||||
@ -1879,12 +1906,28 @@ def parse_options():
|
|||||||
'extensions': options.extensions,
|
'extensions': options.extensions,
|
||||||
'encoding': options.encoding }
|
'encoding': options.encoding }
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
options = parse_options()
|
options = parse_options()
|
||||||
|
|
||||||
|
#if os.access(inFile, os.R_OK):
|
||||||
|
|
||||||
if not options:
|
if not options:
|
||||||
return 0
|
sys.exit(0)
|
||||||
|
|
||||||
markdownFromFile(**options)
|
markdownFromFile(**options)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
sys.exit(main())
|
||||||
""" Run Markdown from the command line. """
|
""" Run Markdown from the command line. """
|
||||||
sys.exit(main)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1,7 +1,4 @@
|
|||||||
"""
|
"""
|
||||||
## To see this file as plain text go to
|
|
||||||
## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
|
|
||||||
|
|
||||||
========================= FOOTNOTES =================================
|
========================= FOOTNOTES =================================
|
||||||
|
|
||||||
This section adds footnote handling to markdown. It can be used as
|
This section adds footnote handling to markdown. It can be used as
|
||||||
@ -119,7 +116,7 @@ class FootnoteExtension (markdown.Extension):
|
|||||||
li = doc.createElement('li')
|
li = doc.createElement('li')
|
||||||
li.setAttribute('id', self.makeFootnoteId(i))
|
li.setAttribute('id', self.makeFootnoteId(i))
|
||||||
|
|
||||||
self.md._processSection(li, self.footnotes[id].split("\n"))
|
self.md._processSection(li, self.footnotes[id].split("\n"), looseList=1)
|
||||||
|
|
||||||
#li.appendChild(doc.createTextNode(self.footnotes[id]))
|
#li.appendChild(doc.createTextNode(self.footnotes[id]))
|
||||||
|
|
||||||
@ -133,8 +130,13 @@ class FootnoteExtension (markdown.Extension):
|
|||||||
if li.childNodes :
|
if li.childNodes :
|
||||||
node = li.childNodes[-1]
|
node = li.childNodes[-1]
|
||||||
if node.type == "text" :
|
if node.type == "text" :
|
||||||
node = li
|
li.appendChild(backlink)
|
||||||
|
elif node.nodeName == "p":
|
||||||
node.appendChild(backlink)
|
node.appendChild(backlink)
|
||||||
|
else:
|
||||||
|
p = doc.createElement('p')
|
||||||
|
p.appendChild(backlink)
|
||||||
|
li.appendChild(p)
|
||||||
|
|
||||||
ol.appendChild(li)
|
ol.appendChild(li)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user