diff --git a/src/libprs500/ebooks/lrf/txt/convert_from.py b/src/libprs500/ebooks/lrf/txt/convert_from.py index 7af7ddb4ce..33a55db4e3 100644 --- a/src/libprs500/ebooks/lrf/txt/convert_from.py +++ b/src/libprs500/ebooks/lrf/txt/convert_from.py @@ -58,11 +58,11 @@ def generate_html(txtfile, encoding, logger): txt = codecs.open(txtfile, 'rb', enc).read() logger.info('Converting text to HTML...') - md = markdown.Markdown(txt, + md = markdown.Markdown( extensions=['footnotes', 'tables', 'toc'], safe_mode=False, ) - html = md.toString() + html = md.convert(txt) p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile)) p.close() codecs.open(p.name, 'wb', 'utf8').write(html) diff --git a/src/libprs500/ebooks/markdown/markdown.py b/src/libprs500/ebooks/markdown/markdown.py index eb7b0b4277..1cb7ce24f5 100644 --- a/src/libprs500/ebooks/markdown/markdown.py +++ b/src/libprs500/ebooks/markdown/markdown.py @@ -1,8 +1,8 @@ #!/usr/bin/env python -version = "1.6b" -version_info = (1,6,2,"rc-2") -__revision__ = "$Rev$" +version = "1.7" +version_info = (1,7,0,"rc-1") +__revision__ = "$Rev: 66 $" """ Python-Markdown @@ -12,7 +12,7 @@ Converts Markdown to HTML. Basic usage as a module: import markdown md = Markdown() - html = markdown.convert(your_text_string) + html = md.convert(your_text_string) See http://www.freewisdom.org/projects/python-markdown/ for more information and instructions on how to extend the functionality of the @@ -20,25 +20,39 @@ script. (You might want to read that before you try modifying this file.) Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and -maintained by [Yuri Takhteyev](http://www.freewisdom.org). +maintained by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan +Limberg](http://achinghead.com/). Contact: yuri [at] freewisdom.org + waylan [at] gmail.com License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD """ -import re, sys, codecs +import re, sys, os, random, codecs + +from logging import getLogger, StreamHandler, Formatter, \ + DEBUG, INFO, WARN, ERROR, CRITICAL -# Set debug level: 3 none, 2 critical, 1 informative, 0 all -(VERBOSE, INFO, CRITICAL, NONE) = range(4) MESSAGE_THRESHOLD = CRITICAL -def message(level, text) : - if level >= MESSAGE_THRESHOLD : - print text + +# Configure debug message logger (the hard way - to support python 2.3) +logger = getLogger('MARKDOWN') +logger.setLevel(DEBUG) # This is restricted by handlers later +console_hndlr = StreamHandler() +formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"') +console_hndlr.setFormatter(formatter) +console_hndlr.setLevel(MESSAGE_THRESHOLD) +logger.addHandler(console_hndlr) + + +def message(level, text): + ''' A wrapper method for logging debug messages. ''' + logger.log(level, text) # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY ----------------- @@ -62,15 +76,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'), # 0780-07BF - Thaana # 07C0-07FF - Nko -BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ), - 'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"), - unicode(codecs.BOM_UTF16_BE, "utf-16")), - #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"), - # unicode(codecs.BOM_UTF32_BE, "utf-32")), +BOMS = { 'utf-8': (codecs.BOM_UTF8, ), + 'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE), + #'utf-32': (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE) } def removeBOM(text, encoding): + convert = isinstance(text, unicode) for bom in BOMS[encoding]: + bom = convert and bom.decode(encoding) or bom if text.startswith(bom): return text.lstrip(bom) return text @@ -94,7 +108,7 @@ BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table', 'form', 'fieldset', 'iframe', 'math', 'ins', 'del', 'hr', 'hr/', 'style'] -def is_block_level (tag) : +def is_block_level (tag): return ( (tag in BLOCK_LEVEL_ELEMENTS) or (tag[0] == 'h' and tag[1] in "0123456789") ) @@ -122,47 +136,47 @@ ENTITY_NORMALIZATION_EXPRESSIONS_SOFT = [ (re.compile("&(?!\#)"), "&"), (re.compile("\""), """)] -def getBidiType(text) : +def getBidiType(text): - if not text : return None + if not text: return None ch = text[0] if not isinstance(ch, unicode) or not ch.isalpha(): return None - else : + else: - for min, max in RTL_BIDI_RANGES : - if ( ch >= min and ch <= max ) : + for min, max in RTL_BIDI_RANGES: + if ( ch >= min and ch <= max ): return "rtl" - else : + else: return "ltr" -class Document : +class Document: - def __init__ (self) : + def __init__ (self): self.bidi = "ltr" - def appendChild(self, child) : + def appendChild(self, child): self.documentElement = child child.isDocumentElement = True child.parent = self self.entities = {} - def setBidi(self, bidi) : - if bidi : + def setBidi(self, bidi): + if bidi: self.bidi = bidi - def createElement(self, tag, textNode=None) : + def createElement(self, tag, textNode=None): el = Element(tag) el.doc = self - if textNode : + if textNode: el.appendChild(self.createTextNode(textNode)) return el - def createTextNode(self, text) : + def createTextNode(self, text): node = TextNode(text) node.doc = self return node @@ -172,51 +186,51 @@ class Document : self.entities[entity] = EntityReference(entity) return self.entities[entity] - def createCDATA(self, text) : + def createCDATA(self, text): node = CDATA(text) node.doc = self return node - def toxml (self) : + def toxml (self): return self.documentElement.toxml() - def normalizeEntities(self, text, avoidDoubleNormalizing=False) : + def normalizeEntities(self, text, avoidDoubleNormalizing=False): - if avoidDoubleNormalizing : + if avoidDoubleNormalizing: regexps = ENTITY_NORMALIZATION_EXPRESSIONS_SOFT - else : + else: regexps = ENTITY_NORMALIZATION_EXPRESSIONS - for regexp, substitution in regexps : + for regexp, substitution in regexps: text = regexp.sub(substitution, text) return text - def find(self, test) : + def find(self, test): return self.documentElement.find(test) - def unlink(self) : + def unlink(self): self.documentElement.unlink() self.documentElement = None -class CDATA : +class CDATA: type = "cdata" - def __init__ (self, text) : + def __init__ (self, text): self.text = text - def handleAttributes(self) : + def handleAttributes(self): pass - def toxml (self) : + def toxml (self): return "" -class Element : +class Element: type = "element" - def __init__ (self, tag) : + def __init__ (self, tag): self.nodeName = tag self.attributes = [] @@ -225,10 +239,11 @@ class Element : self.bidi = None self.isDocumentElement = False - def setBidi(self, bidi) : + def setBidi(self, bidi): - if bidi : + if bidi: + orig_bidi = self.bidi if not self.bidi or self.isDocumentElement: # Once the bidi is set don't change it (except for doc element) @@ -236,56 +251,56 @@ class Element : self.parent.setBidi(bidi) - def unlink(self) : - for child in self.childNodes : - if child.type == "element" : + def unlink(self): + for child in self.childNodes: + if child.type == "element": child.unlink() self.childNodes = None - def setAttribute(self, attr, value) : - if not attr in self.attributes : + def setAttribute(self, attr, value): + if not attr in self.attributes: self.attributes.append(attr) self.attribute_values[attr] = value - def insertChild(self, position, child) : + def insertChild(self, position, child): self.childNodes.insert(position, child) child.parent = self - def removeChild(self, child) : + def removeChild(self, child): self.childNodes.remove(child) - def replaceChild(self, oldChild, newChild) : + def replaceChild(self, oldChild, newChild): position = self.childNodes.index(oldChild) self.removeChild(oldChild) self.insertChild(position, newChild) - def appendChild(self, child) : + def appendChild(self, child): self.childNodes.append(child) child.parent = self - def handleAttributes(self) : + def handleAttributes(self): pass - def find(self, test, depth=0) : + def find(self, test, depth=0): """ Returns a list of descendants that pass the test function """ matched_nodes = [] - for child in self.childNodes : - if test(child) : + for child in self.childNodes: + if test(child): matched_nodes.append(child) - if child.type == "element" : + if child.type == "element": matched_nodes += child.find(test, depth+1) return matched_nodes def toxml(self): - if ENABLE_ATTRIBUTES : + if ENABLE_ATTRIBUTES: for child in self.childNodes: child.handleAttributes() buffer = "" - if self.nodeName in ['h1', 'h2', 'h3', 'h4'] : + if self.nodeName in ['h1', 'h2', 'h3', 'h4']: buffer += "\n" - elif self.nodeName in ['li'] : + elif self.nodeName in ['li']: buffer += "\n " # Process children FIRST, then do the attributes @@ -294,14 +309,14 @@ class Element : if self.childNodes or self.nodeName in ['blockquote']: childBuffer += ">" - for child in self.childNodes : + for child in self.childNodes: childBuffer += child.toxml() - if self.nodeName == 'p' : + if self.nodeName == 'p': childBuffer += "\n" - elif self.nodeName == 'li' : + elif self.nodeName == 'li': childBuffer += "\n " childBuffer += "%s>" % self.nodeName - else : + else: childBuffer += "/>" @@ -309,18 +324,18 @@ class Element : buffer += "<" + self.nodeName if self.nodeName in ['p', 'li', 'ul', 'ol', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6'] : + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']: if not self.attribute_values.has_key("dir"): - if self.bidi : + if self.bidi: bidi = self.bidi - else : + else: bidi = self.doc.bidi - if bidi=="rtl" : + if bidi=="rtl": self.setAttribute("dir", "rtl") - for attr in self.attributes : + for attr in self.attributes: value = self.attribute_values[attr] value = self.doc.normalizeEntities(value, avoidDoubleNormalizing=True) @@ -331,36 +346,36 @@ class Element : buffer += childBuffer - if self.nodeName in ['p', 'li', 'ul', 'ol', + if self.nodeName in ['p', 'br ', 'li', 'ul', 'ol', 'h1', 'h2', 'h3', 'h4'] : buffer += "\n" return buffer -class TextNode : +class TextNode: type = "text" attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} - def __init__ (self, text) : + def __init__ (self, text): self.value = text - def attributeCallback(self, match) : + def attributeCallback(self, match): self.parent.setAttribute(match.group(1), match.group(2)) - def handleAttributes(self) : + def handleAttributes(self): self.value = self.attrRegExp.sub(self.attributeCallback, self.value) - def toxml(self) : + def toxml(self): text = self.value self.parent.setBidi(getBidiType(text)) if not text.startswith(HTML_PLACEHOLDER_PREFIX): - if self.parent.nodeName == "p" : + if self.parent.nodeName == "p": text = text.replace("\n", "\n ") elif (self.parent.nodeName == "li" and self.parent.childNodes[0]==self): @@ -399,7 +414,7 @@ must extend markdown.Preprocessor. """ -class Preprocessor : +class Preprocessor: pass @@ -410,27 +425,27 @@ class HeaderPreprocessor (Preprocessor): the nead for lookahead later. """ - def run (self, lines) : + def run (self, lines): i = -1 - while i+1 < len(lines) : + while i+1 < len(lines): i = i+1 - if not lines[i].strip() : + if not lines[i].strip(): continue - if lines[i].startswith("#") : + if lines[i].startswith("#"): lines.insert(i+1, "\n") if (i+1 <= len(lines) and lines[i+1] - and lines[i+1][0] in ['-', '=']) : + and lines[i+1][0] in ['-', '=']): underline = lines[i+1].strip() - if underline == "="*len(underline) : + if underline == "="*len(underline): lines[i] = "# " + lines[i].strip() lines[i+1] = "" - elif underline == "-"*len(underline) : + elif underline == "-"*len(underline): lines[i] = "## " + lines[i].strip() lines[i+1] = "" @@ -441,21 +456,26 @@ HEADER_PREPROCESSOR = HeaderPreprocessor() class LinePreprocessor (Preprocessor): """Deals with HR lines (needs to be done before processing lists)""" - def run (self, lines) : - for i in range(len(lines)) : - if self._isLine(lines[i]) : - lines[i] = "
%s\n
" % (HTML_PLACEHOLDER % i), + html + "\n") + text = text.replace(HTML_PLACEHOLDER % i, html) + return text + + def escape(self, html): + ''' Basic html escaping ''' + html = html.replace('&', '&') + html = html.replace('<', '<') + html = html.replace('>', '>') + return html.replace('"', '"') + +RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor() + """ ====================================================================== ========================== MISC AUXILIARY CLASSES ==================== ====================================================================== """ -class HtmlStash : +class HtmlStash: """This class is used for stashing HTML objects that we extract in the beginning and replace with place-holders.""" - def __init__ (self) : + def __init__ (self): self.html_counter = 0 # for counting inline html segments self.rawHtmlBlocks=[] - def store(self, html) : + def store(self, html, safe=False): """Saves an HTML segment for later reinsertion. Returns a placeholder string that needs to be inserted into the document. @param html: an html segment + @param safe: label an html segment as safe for safemode + @param inline: label a segmant as inline html @returns : a placeholder string """ - self.rawHtmlBlocks.append(html) + self.rawHtmlBlocks.append((html, safe)) placeholder = HTML_PLACEHOLDER % self.html_counter self.html_counter += 1 return placeholder -class BlockGuru : +class BlockGuru: - def _findHead(self, lines, fn, allowBlank=0) : + def _findHead(self, lines, fn, allowBlank=0): """Functional magic to help determine boundaries of indented blocks. @@ -925,10 +994,11 @@ class BlockGuru : remainder of the original list""" items = [] - + item = -1 + i = 0 # to keep track of where we are - for line in lines : + for line in lines: if not line.strip() and not allowBlank: return items, lines[i:] @@ -938,11 +1008,11 @@ class BlockGuru : i += 1 # Find the next non-blank line - for j in range(i, len(lines)) : - if lines[j].strip() : + for j in range(i, len(lines)): + if lines[j].strip(): next = lines[j] break - else : + else: # There is no more text => this is the end break @@ -950,36 +1020,36 @@ class BlockGuru : part = fn(next) - if part : + if part: items.append("") continue - else : + else: break # found end of the list part = fn(line) - if part : + if part: items.append(part) i += 1 continue - else : + else: return items, lines[i:] - else : + else: i += 1 return items, lines[i:] - def detabbed_fn(self, line) : + def detabbed_fn(self, line): """ An auxiliary method to be passed to _findHead """ m = RE.regExp['tabbed'].match(line) if m: return m.group(4) - else : + else: return None - def detectTabbed(self, lines) : + def detectTabbed(self, lines): return self._findHead(lines, self.detabbed_fn, allowBlank = 1) @@ -990,12 +1060,12 @@ def print_error(string): sys.stderr.write(string +'\n') -def dequote(string) : +def dequote(string): """ Removes quotes from around a string """ if ( ( string.startswith('"') and string.endswith('"')) - or (string.startswith("'") and string.endswith("'")) ) : + or (string.startswith("'") and string.endswith("'")) ): return string[1:-1] - else : + else: return string """ @@ -1008,13 +1078,13 @@ see first if you can do it via pre-processors, post-processors, inline patterns or a combination of the three. """ -class CorePatterns : +class CorePatterns: """This class is scheduled for removal as part of a refactoring effort.""" patterns = { 'header': r'(#*)([^#]*)(#*)', # # A title - 'reference-def' : r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)', + 'reference-def': r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)', # [Google]: http://www.google.com/ 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc. 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text @@ -1023,13 +1093,13 @@ class CorePatterns : 'isline2': r'(\-*)', # --- 'isline3': r'(\_*)', # ___ 'tabbed': r'((\t)|( ))(.*)', # an indented line - 'quoted' : r'> ?(.*)', # a quoted block ("> ...") + 'quoted': r'> ?(.*)', # a quoted block ("> ...") } - def __init__ (self) : + def __init__ (self): self.regExp = {} - for key in self.patterns.keys() : + for key in self.patterns.keys(): self.regExp[key] = re.compile("^%s$" % self.patterns[key], re.DOTALL) @@ -1043,66 +1113,68 @@ class Markdown: Markdown text """ - def __init__(self, source=None, # deprecated + def __init__(self, source=None, # depreciated extensions=[], extension_configs=None, - encoding="utf-8", safe_mode = False): """Creates a new Markdown instance. - @param source: The text in Markdown format. - @param encoding: The character encoding ofaround it - append the paragraph bits directly - # onto parent_elem - el = parent_elem - else : - # Otherwise make a "p" element - el = self.doc.createElement("p") - parent_elem.appendChild(el) - - for item in list : - el.appendChild(item) - + not line.strip()) + + if len(paragraph) and paragraph[0].startswith('#'): + self._processHeader(parent_elem, paragraph) + + elif paragraph: + self._processParagraph(parent_elem, paragraph, + inList, looseList) + if lines and not lines[0].strip(): lines = lines[1:] # skip the first (blank) line - - def _processUList(self, parent_elem, lines, inList) : + def _processHeader(self, parent_elem, paragraph): + m = RE.regExp['header'].match(paragraph[0]) + if m: + level = len(m.group(1)) + h = self.doc.createElement("h%d" % level) + parent_elem.appendChild(h) + for item in self._handleInlineWrapper(m.group(2).strip()): + h.appendChild(item) + else: + message(CRITICAL, "We've got a problem header!") + + + def _processParagraph(self, parent_elem, paragraph, inList, looseList): + list = self._handleInlineWrapper("\n".join(paragraph)) + + if ( parent_elem.nodeName == 'li' + and not (looseList or parent_elem.childNodes)): + + # If this is the first paragraph inside "li", don't + # put
around it - append the paragraph bits directly + # onto parent_elem + el = parent_elem + else: + # Otherwise make a "p" element + el = self.doc.createElement("p") + parent_elem.appendChild(el) + + for item in list: + el.appendChild(item) + + + def _processUList(self, parent_elem, lines, inList): self._processList(parent_elem, lines, inList, listexpr='ul', tag = 'ul') - def _processOList(self, parent_elem, lines, inList) : + def _processOList(self, parent_elem, lines, inList): self._processList(parent_elem, lines, inList, listexpr='ol', tag = 'ol') - def _processList(self, parent_elem, lines, inList, listexpr, tag) : + def _processList(self, parent_elem, lines, inList, listexpr, tag): """Given a list of document lines starting with a list item, finds the end of the list, breaks it up, and recursively processes each list item and the remainder of the text file. @@ -1337,20 +1422,20 @@ class Markdown: i = 0 # a counter to keep track of where we are - for line in lines : + for line in lines: loose = 0 - if not line.strip() : + if not line.strip(): # If we see a blank line, this _might_ be the end of the list i += 1 loose = 1 # Find the next non-blank line - for j in range(i, len(lines)) : - if lines[j].strip() : + for j in range(i, len(lines)): + if lines[j].strip(): next = lines[j] break - else : + else: # There is no more text => end of the list break @@ -1362,7 +1447,7 @@ class Markdown: items[item].append(line.strip()) looseList = loose or looseList continue - else : + else: break # found end of the list # Now we need to detect list items (at the current level) @@ -1371,26 +1456,26 @@ class Markdown: for expr in ['ul', 'ol', 'tabbed']: m = RE.regExp[expr].match(line) - if m : - if expr in ['ul', 'ol'] : # We are looking at a new item + if m: + if expr in ['ul', 'ol']: # We are looking at a new item #if m.group(1) : # Removed the check to allow for a blank line # at the beginning of the list item items.append([m.group(1)]) item += 1 - elif expr == 'tabbed' : # This line needs to be detabbed + elif expr == 'tabbed': # This line needs to be detabbed items[item].append(m.group(4)) #after the 'tab' i += 1 break - else : + else: items[item].append(line) # Just regular continuation i += 1 # added on 2006.02.25 - else : + else: i += 1 # Add the dom elements - for item in items : + for item in items: li = self.doc.createElement("li") ul.appendChild(li) @@ -1401,21 +1486,21 @@ class Markdown: self._processSection(parent_elem, lines[i:], inList) - def _linesUntil(self, lines, condition) : + def _linesUntil(self, lines, condition): """ A utility function to break a list of lines upon the first line that satisfied a condition. The condition argument should be a predicate function. """ i = -1 - for line in lines : + for line in lines: i += 1 - if condition(line) : break - else : + if condition(line): break + else: i += 1 return lines[:i], lines[i:] - def _processQuote(self, parent_elem, lines, inList) : + def _processQuote(self, parent_elem, lines, inList): """Given a list of document lines starting with a quote finds the end of the quote, unindents it and recursively processes the body of the quote and the remainder of the @@ -1428,15 +1513,22 @@ class Markdown: dequoted = [] i = 0 - for line in lines : + blank_line = False # allow one blank line between paragraphs + for line in lines: m = RE.regExp['quoted'].match(line) - if m : + if m: dequoted.append(m.group(1)) i += 1 - else : + blank_line = False + elif not blank_line and line.strip() != '': + dequoted.append(line) + i += 1 + elif not blank_line and line.strip() == '': + dequoted.append(line) + i += 1 + blank_line = True + else: break - else : - i += 1 blockquote = self.doc.createElement('blockquote') parent_elem.appendChild(blockquote) @@ -1447,7 +1539,7 @@ class Markdown: - def _processCodeBlock(self, parent_elem, lines, inList) : + def _processCodeBlock(self, parent_elem, lines, inList): """Given a list of document lines starting with a code block finds the end of the block, puts it into the dom verbatim wrapped in ("
") and recursively processes the
@@ -1471,32 +1563,35 @@ class Markdown:
- def _handleInlineWrapper (self, line) :
+ def _handleInlineWrapper (self, line, patternIndex=0):
parts = [line]
- for pattern in self.inlinePatterns :
+ while patternIndex < len(self.inlinePatterns):
i = 0
- while i < len(parts) :
+ while i < len(parts):
x = parts[i]
- if isinstance(x, (str, unicode)) :
- result = self._applyPattern(x, pattern)
+ if isinstance(x, (str, unicode)):
+ result = self._applyPattern(x, \
+ self.inlinePatterns[patternIndex], \
+ patternIndex)
- if result :
+ if result:
i -= 1
parts.remove(x)
- for y in result :
+ for y in result:
parts.insert(i+1,y)
i += 1
+ patternIndex += 1
- for i in range(len(parts)) :
+ for i in range(len(parts)):
x = parts[i]
- if isinstance(x, (str, unicode)) :
+ if isinstance(x, (str, unicode)):
parts[i] = self.doc.createTextNode(x)
return parts
@@ -1515,13 +1610,13 @@ class Markdown:
if not(line):
return [self.doc.createTextNode(' ')]
- for pattern in self.inlinePatterns :
+ for pattern in self.inlinePatterns:
list = self._applyPattern( line, pattern)
if list: return list
return [self.doc.createTextNode(line)]
- def _applyPattern(self, line, pattern) :
+ def _applyPattern(self, line, pattern, patternIndex=0):
""" Given a pattern name, this function checks if the line
fits the pattern, creates the necessary elements, and returns
@@ -1540,7 +1635,7 @@ class Markdown:
m = pattern.getCompiledRegExp().match(line)
- if not m :
+ if not m:
return None
# if we got a match let the pattern make us a NanoDom node
@@ -1551,15 +1646,15 @@ class Markdown:
if isinstance(node, Element):
- if not node.nodeName in ["code", "pre"] :
- for child in node.childNodes :
+ if not node.nodeName in ["code", "pre"]:
+ for child in node.childNodes:
if isinstance(child, TextNode):
- result = self._handleInlineWrapper(child.value)
+ result = self._handleInlineWrapper(child.value, patternIndex+1)
if result:
- if result == [child] :
+ if result == [child]:
continue
result.reverse()
@@ -1581,74 +1676,61 @@ class Markdown:
- if node :
+ if node:
# Those are in the reverse order!
return ( m.groups()[-1], # the string to the left
node, # the new node
m.group(1)) # the string to the right of the match
- else :
+ else:
return None
def convert (self, source = None):
"""Return the document in XHTML format.
@returns: A serialized XHTML body."""
- #try :
- if source :
+ if source is not None: #Allow blank string
self.source = source
- if not self.source :
- return ""
+ if not self.source:
+ return u""
- self.source = removeBOM(self.source, self.encoding)
+ try:
+ self.source = unicode(self.source)
+ except UnicodeDecodeError:
+ message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
+ return u""
for pp in self.textPreprocessors:
self.source = pp.run(self.source)
-
+
doc = self._transform()
xml = doc.toxml()
- #finally:
- # doc.unlink()
- # Let's stick in all the raw html pieces
+ # Return everything but the top level tag
- for i in range(self.htmlStash.html_counter) :
- html = self.htmlStash.rawHtmlBlocks[i]
- if self.safeMode and html != "
" and html != "
":
- html = HTML_REMOVED_TEXT
-
- xml = xml.replace("%s\n
" % (HTML_PLACEHOLDER % i),
- html + "\n")
- xml = xml.replace(HTML_PLACEHOLDER % i,
- html)
-
- # And return everything but the top level tag
-
- if self.stripTopLevelTags :
+ if self.stripTopLevelTags:
xml = xml.strip()[23:-7] + "\n"
- for pp in self.textPostprocessors :
+ for pp in self.textPostprocessors:
xml = pp.run(xml)
return (self.docType + xml).strip()
- __str__ = convert # deprecated - will be changed in 1.7 to report
- # information about the MD instance
-
- toString = __str__ # toString() method is deprecated
+ def __str__(self):
+ ''' Report info about instance. Markdown always returns unicode. '''
+ if self.source is None:
+ status = 'in which no source text has been assinged.'
+ else:
+ status = 'which contains %d chars and %d line(s) of source.'%\
+ (len(self.source), self.source.count('\n')+1)
+ return 'An instance of "%s" %s'% (self.__class__, status)
+ __unicode__ = convert # markdown should always return a unicode string
- def __unicode__(self):
- """Return the document in XHTML format as a Unicode object.
- """
- return str(self)#.decode(self.encoding)
-
-
- toUnicode = __unicode__ # deprecated - will be removed in 1.7
@@ -1660,46 +1742,46 @@ def markdownFromFile(input = None,
extensions = [],
encoding = None,
message_threshold = CRITICAL,
- safe = False) :
+ safe = False):
- global MESSAGE_THRESHOLD
- MESSAGE_THRESHOLD = message_threshold
+ global console_hndlr
+ console_hndlr.setLevel(message_threshold)
- message(VERBOSE, "input file: %s" % input)
+ message(DEBUG, "input file: %s" % input)
-
- if not encoding :
+ if not encoding:
encoding = "utf-8"
input_file = codecs.open(input, mode="r", encoding=encoding)
text = input_file.read()
input_file.close()
- new_text = markdown(text, extensions, encoding, safe_mode = safe)
+ text = removeBOM(text, encoding)
- if output :
+ new_text = markdown(text, extensions, safe_mode = safe)
+
+ if output:
output_file = codecs.open(output, "w", encoding=encoding)
output_file.write(new_text)
output_file.close()
- else :
+ else:
sys.stdout.write(new_text.encode(encoding))
def markdown(text,
extensions = [],
- encoding = None,
- safe_mode = False) :
+ safe_mode = False):
- message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text)
+ message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
extension_names = []
extension_configs = {}
- for ext in extensions :
+ for ext in extensions:
pos = ext.find("(")
- if pos == -1 :
+ if pos == -1:
extension_names.append(ext)
- else :
+ else:
name = ext[:pos]
extension_names.append(name)
pairs = [x.split("=") for x in ext[pos+1:-1].split(",")]
@@ -1713,21 +1795,21 @@ def markdown(text,
return md.convert(text)
-class Extension :
+class Extension:
- def __init__(self, configs = {}) :
+ def __init__(self, configs = {}):
self.config = configs
- def getConfig(self, key) :
- if self.config.has_key(key) :
+ def getConfig(self, key):
+ if self.config.has_key(key):
return self.config[key][0]
- else :
+ else:
return ""
- def getConfigInfo(self) :
+ def getConfigInfo(self):
return [(key, self.config[key][1]) for key in self.config.keys()]
- def setConfig(self, key, value) :
+ def setConfig(self, key, value):
self.config[key][0] = value
@@ -1739,20 +1821,20 @@ For lower versions of Python use:
""" % EXECUTABLE_NAME_FOR_USAGE
-def parse_options() :
+def parse_options():
- try :
+ try:
optparse = __import__("optparse")
- except :
- if len(sys.argv) == 2 :
- return {'input' : sys.argv[1],
- 'output' : None,
- 'message_threshold' : CRITICAL,
- 'safe' : False,
- 'extensions' : [],
- 'encoding' : None }
+ except:
+ if len(sys.argv) == 2:
+ return {'input': sys.argv[1],
+ 'output': None,
+ 'message_threshold': CRITICAL,
+ 'safe': False,
+ 'extensions': [],
+ 'encoding': None }
- else :
+ else:
print OPTPARSE_WARNING
return None
@@ -1764,63 +1846,45 @@ def parse_options() :
parser.add_option("-e", "--encoding", dest="encoding",
help="encoding for input and output files",)
parser.add_option("-q", "--quiet", default = CRITICAL,
- action="store_const", const=NONE, dest="verbose",
+ action="store_const", const=60, dest="verbose",
help="suppress all messages")
parser.add_option("-v", "--verbose",
action="store_const", const=INFO, dest="verbose",
help="print info messages")
- parser.add_option("-s", "--safe",
- action="store_const", const=True, dest="safe",
- help="same mode (strip user's HTML tag)")
+ parser.add_option("-s", "--safe", dest="safe", default=False,
+ metavar="SAFE_MODE",
+ help="same mode ('replace', 'remove' or 'escape' user's HTML tag)")
parser.add_option("--noisy",
- action="store_const", const=VERBOSE, dest="verbose",
+ action="store_const", const=DEBUG, dest="verbose",
help="print debug messages")
parser.add_option("-x", "--extension", action="append", dest="extensions",
help = "load extension EXTENSION", metavar="EXTENSION")
(options, args) = parser.parse_args()
- if not len(args) == 1 :
+ if not len(args) == 1:
parser.print_help()
return None
- else :
+ else:
input_file = args[0]
- if not options.extensions :
+ if not options.extensions:
options.extensions = []
- return {'input' : input_file,
- 'output' : options.filename,
- 'message_threshold' : options.verbose,
- 'safe' : options.safe,
- 'extensions' : options.extensions,
- 'encoding' : options.encoding }
+ return {'input': input_file,
+ 'output': options.filename,
+ 'message_threshold': options.verbose,
+ 'safe': options.safe,
+ 'extensions': options.extensions,
+ 'encoding': options.encoding }
def main():
options = parse_options()
-
- #if os.access(inFile, os.R_OK):
-
- if not options :
- sys.exit(0)
-
+ if not options:
+ return 0
markdownFromFile(**options)
-
- return 0
-
if __name__ == '__main__':
""" Run Markdown from the command line. """
- sys.exit(main())
-
-
-
-
-
-
-
-
-
-
-
+ sys.exit(main)
\ No newline at end of file