mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #392
This commit is contained in:
parent
42a90b9515
commit
f571a7b603
@ -58,11 +58,11 @@ def generate_html(txtfile, encoding, logger):
|
||||
txt = codecs.open(txtfile, 'rb', enc).read()
|
||||
|
||||
logger.info('Converting text to HTML...')
|
||||
md = markdown.Markdown(txt,
|
||||
md = markdown.Markdown(
|
||||
extensions=['footnotes', 'tables', 'toc'],
|
||||
safe_mode=False,
|
||||
)
|
||||
html = md.toString()
|
||||
html = md.convert(txt)
|
||||
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
|
||||
p.close()
|
||||
codecs.open(p.name, 'wb', 'utf8').write(html)
|
||||
|
@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
version = "1.6b"
|
||||
version_info = (1,6,2,"rc-2")
|
||||
__revision__ = "$Rev$"
|
||||
version = "1.7"
|
||||
version_info = (1,7,0,"rc-1")
|
||||
__revision__ = "$Rev: 66 $"
|
||||
|
||||
"""
|
||||
Python-Markdown
|
||||
@ -12,7 +12,7 @@ Converts Markdown to HTML. Basic usage as a module:
|
||||
|
||||
import markdown
|
||||
md = Markdown()
|
||||
html = markdown.convert(your_text_string)
|
||||
html = md.convert(your_text_string)
|
||||
|
||||
See http://www.freewisdom.org/projects/python-markdown/ for more
|
||||
information and instructions on how to extend the functionality of the
|
||||
@ -20,25 +20,39 @@ script. (You might want to read that before you try modifying this
|
||||
file.)
|
||||
|
||||
Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and
|
||||
maintained by [Yuri Takhteyev](http://www.freewisdom.org).
|
||||
maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan
|
||||
Limberg](http://achinghead.com/).
|
||||
|
||||
Contact: yuri [at] freewisdom.org
|
||||
waylan [at] gmail.com
|
||||
|
||||
License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
|
||||
|
||||
"""
|
||||
|
||||
|
||||
import re, sys, codecs
|
||||
import re, sys, os, random, codecs
|
||||
|
||||
from logging import getLogger, StreamHandler, Formatter, \
|
||||
DEBUG, INFO, WARN, ERROR, CRITICAL
|
||||
|
||||
# Set debug level: 3 none, 2 critical, 1 informative, 0 all
|
||||
(VERBOSE, INFO, CRITICAL, NONE) = range(4)
|
||||
|
||||
MESSAGE_THRESHOLD = CRITICAL
|
||||
|
||||
|
||||
# Configure debug message logger (the hard way - to support python 2.3)
|
||||
logger = getLogger('MARKDOWN')
|
||||
logger.setLevel(DEBUG) # This is restricted by handlers later
|
||||
console_hndlr = StreamHandler()
|
||||
formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"')
|
||||
console_hndlr.setFormatter(formatter)
|
||||
console_hndlr.setLevel(MESSAGE_THRESHOLD)
|
||||
logger.addHandler(console_hndlr)
|
||||
|
||||
|
||||
def message(level, text):
|
||||
if level >= MESSAGE_THRESHOLD :
|
||||
print text
|
||||
''' A wrapper method for logging debug messages. '''
|
||||
logger.log(level, text)
|
||||
|
||||
|
||||
# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
|
||||
@ -62,15 +76,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
|
||||
# 0780-07BF - Thaana
|
||||
# 07C0-07FF - Nko
|
||||
|
||||
BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ),
|
||||
'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"),
|
||||
unicode(codecs.BOM_UTF16_BE, "utf-16")),
|
||||
#'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"),
|
||||
# unicode(codecs.BOM_UTF32_BE, "utf-32")),
|
||||
BOMS = { 'utf-8': (codecs.BOM_UTF8, ),
|
||||
'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE),
|
||||
#'utf-32': (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)
|
||||
}
|
||||
|
||||
def removeBOM(text, encoding):
|
||||
convert = isinstance(text, unicode)
|
||||
for bom in BOMS[encoding]:
|
||||
bom = convert and bom.decode(encoding) or bom
|
||||
if text.startswith(bom):
|
||||
return text.lstrip(bom)
|
||||
return text
|
||||
@ -229,6 +243,7 @@ class Element :
|
||||
|
||||
if bidi:
|
||||
|
||||
orig_bidi = self.bidi
|
||||
|
||||
if not self.bidi or self.isDocumentElement:
|
||||
# Once the bidi is set don't change it (except for doc element)
|
||||
@ -331,7 +346,7 @@ class Element :
|
||||
|
||||
buffer += childBuffer
|
||||
|
||||
if self.nodeName in ['p', 'li', 'ul', 'ol',
|
||||
if self.nodeName in ['p', 'br ', 'li', 'ul', 'ol',
|
||||
'h1', 'h2', 'h3', 'h4'] :
|
||||
buffer += "\n"
|
||||
|
||||
@ -441,14 +456,19 @@ HEADER_PREPROCESSOR = HeaderPreprocessor()
|
||||
class LinePreprocessor (Preprocessor):
|
||||
"""Deals with HR lines (needs to be done before processing lists)"""
|
||||
|
||||
blockquote_re = re.compile(r'^(> )+')
|
||||
|
||||
def run (self, lines):
|
||||
for i in range(len(lines)):
|
||||
if self._isLine(lines[i]) :
|
||||
lines[i] = "<hr />"
|
||||
prefix = ''
|
||||
m = self.blockquote_re.search(lines[i])
|
||||
if m : prefix = m.group(0)
|
||||
if self._isLine(lines[i][len(prefix):]):
|
||||
lines[i] = prefix + self.stash.store("<hr />", safe=True)
|
||||
return lines
|
||||
|
||||
def _isLine(self, block):
|
||||
"""Determines if a block should be replaced with an <HR>"""
|
||||
"""Determines if a block should be replaced with an <:wHR>"""
|
||||
if block.startswith(" "): return 0 # a code block
|
||||
text = "".join([x for x in block if not x.isspace()])
|
||||
if len(text) <= 2:
|
||||
@ -463,19 +483,6 @@ class LinePreprocessor (Preprocessor):
|
||||
LINE_PREPROCESSOR = LinePreprocessor()
|
||||
|
||||
|
||||
class LineBreaksPreprocessor (Preprocessor):
|
||||
"""Replaces double spaces at the end of the lines with <br/ >."""
|
||||
|
||||
def run (self, lines) :
|
||||
for i in range(len(lines)) :
|
||||
if (lines[i].endswith(" ")
|
||||
and not RE.regExp['tabbed'].match(lines[i]) ):
|
||||
lines[i] += "<br />"
|
||||
return lines
|
||||
|
||||
LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
|
||||
|
||||
|
||||
class HtmlBlockPreprocessor (Preprocessor):
|
||||
"""Removes html blocks from self.lines"""
|
||||
|
||||
@ -507,7 +514,6 @@ class HtmlBlockPreprocessor (Preprocessor):
|
||||
def run (self, text):
|
||||
|
||||
new_blocks = []
|
||||
#text = "\n".join(lines)
|
||||
text = text.split("\n\n")
|
||||
|
||||
items = []
|
||||
@ -570,7 +576,7 @@ class HtmlBlockPreprocessor (Preprocessor):
|
||||
new_blocks.append(self.stash.store('\n\n'.join(items)))
|
||||
new_blocks.append('\n')
|
||||
|
||||
return "\n\n".join(new_blocks) #.split("\n")
|
||||
return "\n\n".join(new_blocks)
|
||||
|
||||
HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
|
||||
|
||||
@ -648,9 +654,10 @@ So, we apply the expressions in the following order:
|
||||
|
||||
NOBRACKET = r'[^\]\[]*'
|
||||
BRK = ( r'\[('
|
||||
+ (NOBRACKET + r'(\['+NOBRACKET)*6
|
||||
+ (NOBRACKET+ r'\])*'+NOBRACKET)*6
|
||||
+ (NOBRACKET + r'(\[')*6
|
||||
+ (NOBRACKET+ r'\])*')*6
|
||||
+ NOBRACKET + r')\]' )
|
||||
NOIMG = r'(?<!\!)'
|
||||
|
||||
BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2`
|
||||
DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")``
|
||||
@ -667,10 +674,10 @@ else :
|
||||
STRONG_2_RE = r'__([^_]*)__' # __strong__
|
||||
STRONG_EM_2_RE = r'___([^_]*)___' # ___strong___
|
||||
|
||||
LINK_RE = BRK + r'\s*\(([^\)]*)\)' # [text](url)
|
||||
LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
|
||||
LINK_RE = NOIMG + BRK + r'\s*\(([^\)]*)\)' # [text](url)
|
||||
LINK_ANGLED_RE = NOIMG + BRK + r'\s*\(<([^\)]*)>\)' # [text](<url>)
|
||||
IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # 
|
||||
REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
|
||||
REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]' # [Google][3]
|
||||
IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
|
||||
NOT_STRONG_RE = r'( \* )' # stand-alone * or _
|
||||
AUTOLINK_RE = r'<(http://[^>]*)>' # <http://www.123.com>
|
||||
@ -678,6 +685,8 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # <me@example.com>
|
||||
#HTML_RE = r'(\<[^\>]*\>)' # <...>
|
||||
HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)' # <...>
|
||||
ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # &
|
||||
LINE_BREAK_RE = r' \n' # two spaces at end of line
|
||||
LINE_BREAK_2_RE = r' $' # two spaces at end of text
|
||||
|
||||
class Pattern:
|
||||
|
||||
@ -706,6 +715,11 @@ class SimpleTagPattern (Pattern):
|
||||
el.appendChild(doc.createTextNode(m.group(2)))
|
||||
return el
|
||||
|
||||
class SubstituteTagPattern (SimpleTagPattern):
|
||||
|
||||
def handleMatch (self, m, doc):
|
||||
return doc.createElement(self.tag)
|
||||
|
||||
class BacktickPattern (Pattern):
|
||||
|
||||
def __init__ (self, pattern):
|
||||
@ -734,7 +748,9 @@ class DoubleTagPattern (SimpleTagPattern) :
|
||||
class HtmlPattern (Pattern):
|
||||
|
||||
def handleMatch (self, m, doc):
|
||||
place_holder = self.stash.store(m.group(2))
|
||||
rawhtml = m.group(2)
|
||||
inline = True
|
||||
place_holder = self.stash.store(rawhtml)
|
||||
return doc.createTextNode(place_holder)
|
||||
|
||||
|
||||
@ -762,7 +778,10 @@ class ImagePattern (Pattern):
|
||||
def handleMatch(self, m, doc):
|
||||
el = doc.createElement('img')
|
||||
src_parts = m.group(9).split()
|
||||
if src_parts:
|
||||
el.setAttribute('src', src_parts[0])
|
||||
else:
|
||||
el.setAttribute('src', "")
|
||||
if len(src_parts) > 1:
|
||||
el.setAttribute('title', dequote(" ".join(src_parts[1:])))
|
||||
if ENABLE_ATTRIBUTES:
|
||||
@ -849,6 +868,9 @@ EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em')
|
||||
STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
|
||||
STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
|
||||
|
||||
LINE_BREAK_PATTERN = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
|
||||
LINE_BREAK_PATTERN_2 = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
|
||||
|
||||
LINK_PATTERN = LinkPattern(LINK_RE)
|
||||
LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE)
|
||||
IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE)
|
||||
@ -882,6 +904,51 @@ class Postprocessor :
|
||||
pass
|
||||
|
||||
|
||||
"""
|
||||
======================================================================
|
||||
======================== TEXT-POST-PROCESSORS ========================
|
||||
======================================================================
|
||||
|
||||
Markdown also allows text-post-processors, which are similar to
|
||||
textpreprocessors in that they need to implement a "run" method.
|
||||
Unlike post-processors, they take a text string as a parameter and
|
||||
should return a string.
|
||||
|
||||
Text-Post-Processors should extend markdown.Postprocessor.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class RawHtmlTextPostprocessor(Postprocessor):
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def run(self, text):
|
||||
for i in range(self.stash.html_counter):
|
||||
html, safe = self.stash.rawHtmlBlocks[i]
|
||||
if self.safeMode and not safe:
|
||||
if str(self.safeMode).lower() == 'escape':
|
||||
html = self.escape(html)
|
||||
elif str(self.safeMode).lower() == 'remove':
|
||||
html = ''
|
||||
else:
|
||||
html = HTML_REMOVED_TEXT
|
||||
|
||||
text = text.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
|
||||
html + "\n")
|
||||
text = text.replace(HTML_PLACEHOLDER % i, html)
|
||||
return text
|
||||
|
||||
def escape(self, html):
|
||||
''' Basic html escaping '''
|
||||
html = html.replace('&', '&')
|
||||
html = html.replace('<', '<')
|
||||
html = html.replace('>', '>')
|
||||
return html.replace('"', '"')
|
||||
|
||||
RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
|
||||
|
||||
"""
|
||||
======================================================================
|
||||
========================== MISC AUXILIARY CLASSES ====================
|
||||
@ -896,14 +963,16 @@ class HtmlStash :
|
||||
self.html_counter = 0 # for counting inline html segments
|
||||
self.rawHtmlBlocks=[]
|
||||
|
||||
def store(self, html) :
|
||||
def store(self, html, safe=False):
|
||||
"""Saves an HTML segment for later reinsertion. Returns a
|
||||
placeholder string that needs to be inserted into the
|
||||
document.
|
||||
|
||||
@param html: an html segment
|
||||
@param safe: label an html segment as safe for safemode
|
||||
@param inline: label a segmant as inline html
|
||||
@returns : a placeholder string """
|
||||
self.rawHtmlBlocks.append(html)
|
||||
self.rawHtmlBlocks.append((html, safe))
|
||||
placeholder = HTML_PLACEHOLDER % self.html_counter
|
||||
self.html_counter += 1
|
||||
return placeholder
|
||||
@ -925,6 +994,7 @@ class BlockGuru :
|
||||
remainder of the original list"""
|
||||
|
||||
items = []
|
||||
item = -1
|
||||
|
||||
i = 0 # to keep track of where we are
|
||||
|
||||
@ -1043,31 +1113,30 @@ class Markdown:
|
||||
Markdown text """
|
||||
|
||||
|
||||
def __init__(self, source=None, # deprecated
|
||||
def __init__(self, source=None, # depreciated
|
||||
extensions=[],
|
||||
extension_configs=None,
|
||||
encoding="utf-8",
|
||||
safe_mode = False):
|
||||
"""Creates a new Markdown instance.
|
||||
|
||||
@param source: The text in Markdown format.
|
||||
@param encoding: The character encoding of <text>. """
|
||||
@param source: The text in Markdown format. Depreciated!
|
||||
@param extensions: A list if extensions.
|
||||
@param extension-configs: Configuration setting for extensions.
|
||||
@param safe_mode: Disallow raw html. """
|
||||
|
||||
self.safeMode = safe_mode
|
||||
self.encoding = encoding
|
||||
self.source = source
|
||||
if source is not None:
|
||||
message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.")
|
||||
self.safeMode = safe_mode
|
||||
self.blockGuru = BlockGuru()
|
||||
self.registeredExtensions = []
|
||||
self.stripTopLevelTags = 1
|
||||
self.docType = ""
|
||||
|
||||
|
||||
self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
|
||||
|
||||
self.preprocessors = [
|
||||
HEADER_PREPROCESSOR,
|
||||
self.preprocessors = [HEADER_PREPROCESSOR,
|
||||
LINE_PREPROCESSOR,
|
||||
LINE_BREAKS_PREPROCESSOR,
|
||||
# A footnote preprocessor will
|
||||
# get inserted here
|
||||
REFERENCE_PREPROCESSOR]
|
||||
@ -1076,8 +1145,9 @@ class Markdown:
|
||||
self.postprocessors = [] # a footnote postprocessor will get
|
||||
# inserted later
|
||||
|
||||
self.textPostprocessors = [] # a footnote postprocessor will get
|
||||
# inserted later
|
||||
self.textPostprocessors = [# a footnote postprocessor will get
|
||||
# inserted here
|
||||
RAWHTMLTEXTPOSTPROCESSOR]
|
||||
|
||||
self.prePatterns = []
|
||||
|
||||
@ -1085,13 +1155,15 @@ class Markdown:
|
||||
self.inlinePatterns = [DOUBLE_BACKTICK_PATTERN,
|
||||
BACKTICK_PATTERN,
|
||||
ESCAPE_PATTERN,
|
||||
IMAGE_LINK_PATTERN,
|
||||
IMAGE_REFERENCE_PATTERN,
|
||||
REFERENCE_PATTERN,
|
||||
LINK_ANGLED_PATTERN,
|
||||
LINK_PATTERN,
|
||||
IMAGE_LINK_PATTERN,
|
||||
IMAGE_REFERENCE_PATTERN,
|
||||
AUTOLINK_PATTERN,
|
||||
AUTOMAIL_PATTERN,
|
||||
#LINE_BREAK_PATTERN_2, Removed by Kovid as causes problems with mdx_tables
|
||||
LINE_BREAK_PATTERN,
|
||||
HTML_PATTERN,
|
||||
ENTITY_PATTERN,
|
||||
NOT_STRONG_PATTERN,
|
||||
@ -1121,6 +1193,7 @@ class Markdown:
|
||||
|
||||
try:
|
||||
module = sys.modules[extension_module_name]
|
||||
|
||||
except:
|
||||
message(CRITICAL,
|
||||
"couldn't load extension %s (looking for %s module)"
|
||||
@ -1149,11 +1222,14 @@ class Markdown:
|
||||
self.htmlStash = HtmlStash()
|
||||
|
||||
HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
|
||||
LINE_PREPROCESSOR.stash = self.htmlStash
|
||||
REFERENCE_PREPROCESSOR.references = self.references
|
||||
HTML_PATTERN.stash = self.htmlStash
|
||||
ENTITY_PATTERN.stash = self.htmlStash
|
||||
REFERENCE_PATTERN.references = self.references
|
||||
IMAGE_REFERENCE_PATTERN.references = self.references
|
||||
RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
|
||||
RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
|
||||
|
||||
for extension in self.registeredExtensions:
|
||||
extension.reset()
|
||||
@ -1173,7 +1249,7 @@ class Markdown:
|
||||
self.doc.appendChild(self.top_element)
|
||||
|
||||
# Fixup the source text
|
||||
text = self.source #.strip()
|
||||
text = self.source
|
||||
text = text.replace("\r\n", "\n").replace("\r", "\n")
|
||||
text += "\n\n"
|
||||
text = text.expandtabs(TAB_LENGTH)
|
||||
@ -1226,7 +1302,9 @@ class Markdown:
|
||||
@param inList: a level
|
||||
@returns: None"""
|
||||
|
||||
# Loop through lines until none left.
|
||||
while lines:
|
||||
|
||||
# Check if this section starts with a list, a blockquote or
|
||||
# a code block
|
||||
|
||||
@ -1257,6 +1335,7 @@ class Markdown:
|
||||
#
|
||||
|
||||
if inList:
|
||||
|
||||
start, lines = self._linesUntil(lines, (lambda line:
|
||||
RE.regExp['ul'].match(line)
|
||||
or RE.regExp['ol'].match(line)
|
||||
@ -1264,15 +1343,25 @@ class Markdown:
|
||||
|
||||
self._processSection(parent_elem, start,
|
||||
inList - 1, looseList = looseList)
|
||||
self._processSection(parent_elem, lines,
|
||||
inList - 1, looseList = looseList)
|
||||
|
||||
inList = inList-1
|
||||
|
||||
else: # Ok, so it's just a simple block
|
||||
|
||||
paragraph, lines = self._linesUntil(lines, lambda line:
|
||||
not line.strip())
|
||||
|
||||
if len(paragraph) and paragraph[0].startswith('#'):
|
||||
self._processHeader(parent_elem, paragraph)
|
||||
|
||||
elif paragraph:
|
||||
self._processParagraph(parent_elem, paragraph,
|
||||
inList, looseList)
|
||||
|
||||
if lines and not lines[0].strip():
|
||||
lines = lines[1:] # skip the first (blank) line
|
||||
|
||||
|
||||
def _processHeader(self, parent_elem, paragraph):
|
||||
m = RE.regExp['header'].match(paragraph[0])
|
||||
if m:
|
||||
level = len(m.group(1))
|
||||
@ -1283,13 +1372,13 @@ class Markdown:
|
||||
else:
|
||||
message(CRITICAL, "We've got a problem header!")
|
||||
|
||||
elif paragraph :
|
||||
|
||||
def _processParagraph(self, parent_elem, paragraph, inList, looseList):
|
||||
list = self._handleInlineWrapper("\n".join(paragraph))
|
||||
|
||||
if ( parent_elem.nodeName == 'li'
|
||||
and not (looseList or parent_elem.childNodes)):
|
||||
|
||||
#and not parent_elem.childNodes) :
|
||||
# If this is the first paragraph inside "li", don't
|
||||
# put <p> around it - append the paragraph bits directly
|
||||
# onto parent_elem
|
||||
@ -1302,10 +1391,6 @@ class Markdown:
|
||||
for item in list:
|
||||
el.appendChild(item)
|
||||
|
||||
if lines and not lines[0].strip():
|
||||
lines = lines[1:] # skip the first (blank) line
|
||||
|
||||
|
||||
|
||||
def _processUList(self, parent_elem, lines, inList):
|
||||
self._processList(parent_elem, lines, inList,
|
||||
@ -1428,15 +1513,22 @@ class Markdown:
|
||||
|
||||
dequoted = []
|
||||
i = 0
|
||||
blank_line = False # allow one blank line between paragraphs
|
||||
for line in lines:
|
||||
m = RE.regExp['quoted'].match(line)
|
||||
if m:
|
||||
dequoted.append(m.group(1))
|
||||
i += 1
|
||||
blank_line = False
|
||||
elif not blank_line and line.strip() != '':
|
||||
dequoted.append(line)
|
||||
i += 1
|
||||
elif not blank_line and line.strip() == '':
|
||||
dequoted.append(line)
|
||||
i += 1
|
||||
blank_line = True
|
||||
else:
|
||||
break
|
||||
else :
|
||||
i += 1
|
||||
|
||||
blockquote = self.doc.createElement('blockquote')
|
||||
parent_elem.appendChild(blockquote)
|
||||
@ -1471,11 +1563,11 @@ class Markdown:
|
||||
|
||||
|
||||
|
||||
def _handleInlineWrapper (self, line) :
|
||||
def _handleInlineWrapper (self, line, patternIndex=0):
|
||||
|
||||
parts = [line]
|
||||
|
||||
for pattern in self.inlinePatterns :
|
||||
while patternIndex < len(self.inlinePatterns):
|
||||
|
||||
i = 0
|
||||
|
||||
@ -1484,7 +1576,9 @@ class Markdown:
|
||||
x = parts[i]
|
||||
|
||||
if isinstance(x, (str, unicode)):
|
||||
result = self._applyPattern(x, pattern)
|
||||
result = self._applyPattern(x, \
|
||||
self.inlinePatterns[patternIndex], \
|
||||
patternIndex)
|
||||
|
||||
if result:
|
||||
i -= 1
|
||||
@ -1493,6 +1587,7 @@ class Markdown:
|
||||
parts.insert(i+1,y)
|
||||
|
||||
i += 1
|
||||
patternIndex += 1
|
||||
|
||||
for i in range(len(parts)):
|
||||
x = parts[i]
|
||||
@ -1521,7 +1616,7 @@ class Markdown:
|
||||
|
||||
return [self.doc.createTextNode(line)]
|
||||
|
||||
def _applyPattern(self, line, pattern) :
|
||||
def _applyPattern(self, line, pattern, patternIndex=0):
|
||||
|
||||
""" Given a pattern name, this function checks if the line
|
||||
fits the pattern, creates the necessary elements, and returns
|
||||
@ -1555,7 +1650,7 @@ class Markdown:
|
||||
for child in node.childNodes:
|
||||
if isinstance(child, TextNode):
|
||||
|
||||
result = self._handleInlineWrapper(child.value)
|
||||
result = self._handleInlineWrapper(child.value, patternIndex+1)
|
||||
|
||||
if result:
|
||||
|
||||
@ -1594,15 +1689,18 @@ class Markdown:
|
||||
"""Return the document in XHTML format.
|
||||
|
||||
@returns: A serialized XHTML body."""
|
||||
#try :
|
||||
|
||||
if source :
|
||||
if source is not None: #Allow blank string
|
||||
self.source = source
|
||||
|
||||
if not self.source:
|
||||
return ""
|
||||
return u""
|
||||
|
||||
self.source = removeBOM(self.source, self.encoding)
|
||||
try:
|
||||
self.source = unicode(self.source)
|
||||
except UnicodeDecodeError:
|
||||
message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
|
||||
return u""
|
||||
|
||||
for pp in self.textPreprocessors:
|
||||
self.source = pp.run(self.source)
|
||||
@ -1610,22 +1708,8 @@ class Markdown:
|
||||
doc = self._transform()
|
||||
xml = doc.toxml()
|
||||
|
||||
#finally:
|
||||
# doc.unlink()
|
||||
|
||||
# Let's stick in all the raw html pieces
|
||||
|
||||
for i in range(self.htmlStash.html_counter) :
|
||||
html = self.htmlStash.rawHtmlBlocks[i]
|
||||
if self.safeMode and html != "<hr />" and html != "<br />":
|
||||
html = HTML_REMOVED_TEXT
|
||||
|
||||
xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
|
||||
html + "\n")
|
||||
xml = xml.replace(HTML_PLACEHOLDER % i,
|
||||
html)
|
||||
|
||||
# And return everything but the top level tag
|
||||
# Return everything but the top level tag
|
||||
|
||||
if self.stripTopLevelTags:
|
||||
xml = xml.strip()[23:-7] + "\n"
|
||||
@ -1636,20 +1720,18 @@ class Markdown:
|
||||
return (self.docType + xml).strip()
|
||||
|
||||
|
||||
__str__ = convert # deprecated - will be changed in 1.7 to report
|
||||
# information about the MD instance
|
||||
def __str__(self):
|
||||
''' Report info about instance. Markdown always returns unicode. '''
|
||||
if self.source is None:
|
||||
status = 'in which no source text has been assinged.'
|
||||
else:
|
||||
status = 'which contains %d chars and %d line(s) of source.'%\
|
||||
(len(self.source), self.source.count('\n')+1)
|
||||
return 'An instance of "%s" %s'% (self.__class__, status)
|
||||
|
||||
toString = __str__ # toString() method is deprecated
|
||||
__unicode__ = convert # markdown should always return a unicode string
|
||||
|
||||
|
||||
def __unicode__(self):
|
||||
"""Return the document in XHTML format as a Unicode object.
|
||||
"""
|
||||
return str(self)#.decode(self.encoding)
|
||||
|
||||
|
||||
toUnicode = __unicode__ # deprecated - will be removed in 1.7
|
||||
|
||||
|
||||
|
||||
|
||||
@ -1662,11 +1744,10 @@ def markdownFromFile(input = None,
|
||||
message_threshold = CRITICAL,
|
||||
safe = False):
|
||||
|
||||
global MESSAGE_THRESHOLD
|
||||
MESSAGE_THRESHOLD = message_threshold
|
||||
|
||||
message(VERBOSE, "input file: %s" % input)
|
||||
global console_hndlr
|
||||
console_hndlr.setLevel(message_threshold)
|
||||
|
||||
message(DEBUG, "input file: %s" % input)
|
||||
|
||||
if not encoding:
|
||||
encoding = "utf-8"
|
||||
@ -1675,7 +1756,9 @@ def markdownFromFile(input = None,
|
||||
text = input_file.read()
|
||||
input_file.close()
|
||||
|
||||
new_text = markdown(text, extensions, encoding, safe_mode = safe)
|
||||
text = removeBOM(text, encoding)
|
||||
|
||||
new_text = markdown(text, extensions, safe_mode = safe)
|
||||
|
||||
if output:
|
||||
output_file = codecs.open(output, "w", encoding=encoding)
|
||||
@ -1687,10 +1770,9 @@ def markdownFromFile(input = None,
|
||||
|
||||
def markdown(text,
|
||||
extensions = [],
|
||||
encoding = None,
|
||||
safe_mode = False):
|
||||
|
||||
message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text)
|
||||
message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
|
||||
|
||||
extension_names = []
|
||||
extension_configs = {}
|
||||
@ -1764,17 +1846,17 @@ def parse_options() :
|
||||
parser.add_option("-e", "--encoding", dest="encoding",
|
||||
help="encoding for input and output files",)
|
||||
parser.add_option("-q", "--quiet", default = CRITICAL,
|
||||
action="store_const", const=NONE, dest="verbose",
|
||||
action="store_const", const=60, dest="verbose",
|
||||
help="suppress all messages")
|
||||
parser.add_option("-v", "--verbose",
|
||||
action="store_const", const=INFO, dest="verbose",
|
||||
help="print info messages")
|
||||
parser.add_option("-s", "--safe",
|
||||
action="store_const", const=True, dest="safe",
|
||||
help="same mode (strip user's HTML tag)")
|
||||
parser.add_option("-s", "--safe", dest="safe", default=False,
|
||||
metavar="SAFE_MODE",
|
||||
help="same mode ('replace', 'remove' or 'escape' user's HTML tag)")
|
||||
|
||||
parser.add_option("--noisy",
|
||||
action="store_const", const=VERBOSE, dest="verbose",
|
||||
action="store_const", const=DEBUG, dest="verbose",
|
||||
help="print debug messages")
|
||||
parser.add_option("-x", "--extension", action="append", dest="extensions",
|
||||
help = "load extension EXTENSION", metavar="EXTENSION")
|
||||
@ -1799,28 +1881,10 @@ def parse_options() :
|
||||
|
||||
def main():
|
||||
options = parse_options()
|
||||
|
||||
#if os.access(inFile, os.R_OK):
|
||||
|
||||
if not options:
|
||||
sys.exit(0)
|
||||
|
||||
markdownFromFile(**options)
|
||||
|
||||
return 0
|
||||
|
||||
markdownFromFile(**options)
|
||||
|
||||
if __name__ == '__main__':
|
||||
""" Run Markdown from the command line. """
|
||||
sys.exit(main())
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
sys.exit(main)
|
Loading…
x
Reference in New Issue
Block a user