Fix #392

2025-07-31 14:33:54 -04:00 · 2007-12-27 23:32:16 +00:00 · 2007-12-27 23:32:16 +00:00 · f571a7b603
commit f571a7b603
parent 42a90b9515
2 changed files with 497 additions and 433 deletions
--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@ -58,11 +58,11 @@ def generate_html(txtfile, encoding, logger):
        txt = codecs.open(txtfile, 'rb', enc).read()
    logger.info('Converting text to HTML...')
-    md = markdown.Markdown(txt,
+    md = markdown.Markdown(
                       extensions=['footnotes', 'tables', 'toc'],
                       safe_mode=False,
                       )
-    html = md.toString()
+    html = md.convert(txt)
    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
    p.close()
    codecs.open(p.name, 'wb', 'utf8').write(html)
--- a/src/libprs500/ebooks/markdown/markdown.py
+++ b/src/libprs500/ebooks/markdown/markdown.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python
-version = "1.6b"
+version = "1.7"
-version_info = (1,6,2,"rc-2")
+version_info = (1,7,0,"rc-1")
-__revision__ = "$Rev$"
+__revision__ = "$Rev: 66 $"
 """
 Python-Markdown
@ -12,7 +12,7 @@ Converts Markdown to HTML.  Basic usage as a module:
    import markdown
    md = Markdown()
-    html = markdown.convert(your_text_string)
+    html = md.convert(your_text_string)
 See http://www.freewisdom.org/projects/python-markdown/ for more
 information and instructions on how to extend the functionality of the
@ -20,25 +20,39 @@ script.  (You might want to read that before you try modifying this
 file.)
 Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
-maintained  by [Yuri Takhteyev](http://www.freewisdom.org).
+maintained  by [Yuri Takhteyev](http://www.freewisdom.org) and [Waylan
 Limberg](http://achinghead.com/).
 Contact: yuri [at] freewisdom.org
         waylan [at] gmail.com
 License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD
 """
-import re, sys, codecs
+import re, sys, os, random, codecs
 from logging import getLogger, StreamHandler, Formatter, \
                    DEBUG, INFO, WARN, ERROR, CRITICAL
 # Set debug level: 3 none, 2 critical, 1 informative, 0 all
 (VERBOSE, INFO, CRITICAL, NONE) = range(4)
 MESSAGE_THRESHOLD = CRITICAL
 # Configure debug message logger (the hard way - to support python 2.3)
 logger = getLogger('MARKDOWN')
 logger.setLevel(DEBUG) # This is restricted by handlers later
 console_hndlr = StreamHandler()
 formatter = Formatter('%(name)s-%(levelname)s: "%(message)s"')
 console_hndlr.setFormatter(formatter)
 console_hndlr.setLevel(MESSAGE_THRESHOLD)
 logger.addHandler(console_hndlr)
 def message(level, text):
-    if level >= MESSAGE_THRESHOLD :
+    ''' A wrapper method for logging debug messages. '''
-        print text
+    logger.log(level, text)
 # --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
@ -62,15 +76,15 @@ RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
 # 0780-07BF - Thaana
 # 07C0-07FF - Nko
-BOMS = { 'utf-8' : (unicode(codecs.BOM_UTF8, "utf-8"), ),
+BOMS = { 'utf-8': (codecs.BOM_UTF8, ),
-         'utf-16' : (unicode(codecs.BOM_UTF16_LE, "utf-16"),
+         'utf-16': (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE),
-                     unicode(codecs.BOM_UTF16_BE, "utf-16")),
+         #'utf-32': (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)
         #'utf-32' : (unicode(codecs.BOM_UTF32_LE, "utf-32"),
         #            unicode(codecs.BOM_UTF32_BE, "utf-32")),
         }
 def removeBOM(text, encoding):
    convert = isinstance(text, unicode)
    for bom in BOMS[encoding]:
        bom = convert and bom.decode(encoding) or bom
        if text.startswith(bom):
            return text.lstrip(bom)
    return text
@ -229,6 +243,7 @@ class Element :
        if bidi:
            orig_bidi = self.bidi
            if not self.bidi or self.isDocumentElement:
                # Once the bidi is set don't change it (except for doc element)
@ -331,7 +346,7 @@ class Element :
        buffer += childBuffer
-        if self.nodeName in ['p', 'li', 'ul', 'ol',
+        if self.nodeName in ['p', 'br ', 'li', 'ul', 'ol',
                             'h1', 'h2', 'h3', 'h4'] :
            buffer += "\n"
@ -441,14 +456,19 @@ HEADER_PREPROCESSOR = HeaderPreprocessor()
 class LinePreprocessor (Preprocessor):
    """Deals with HR lines (needs to be done before processing lists)"""
    blockquote_re = re.compile(r'^(> )+')
    def run (self, lines):
        for i in range(len(lines)):
-            if self._isLine(lines[i]) :
+            prefix = ''
-                lines[i] = "<hr />"
+            m = self.blockquote_re.search(lines[i])
            if m : prefix = m.group(0)
            if self._isLine(lines[i][len(prefix):]):
                lines[i] = prefix + self.stash.store("<hr />", safe=True)
        return lines
    def _isLine(self, block):
-        """Determines if a block should be replaced with an <HR>"""
+        """Determines if a block should be replaced with an <:wHR>"""
        if block.startswith("    "): return 0  # a code block
        text = "".join([x for x in block if not x.isspace()])
        if len(text) <= 2:
@ -463,19 +483,6 @@ class LinePreprocessor (Preprocessor):
 LINE_PREPROCESSOR = LinePreprocessor()
 class LineBreaksPreprocessor (Preprocessor):
    """Replaces double spaces at the end of the lines with <br/ >."""
    def run (self, lines) :
        for i in range(len(lines)) :
            if (lines[i].endswith("  ")
                and not RE.regExp['tabbed'].match(lines[i]) ):
                lines[i] += "<br />"
        return lines
 LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor()
 class HtmlBlockPreprocessor (Preprocessor):
    """Removes html blocks from self.lines"""
@ -507,7 +514,6 @@ class HtmlBlockPreprocessor (Preprocessor):
    def run (self, text):
        new_blocks = []
        #text = "\n".join(lines)
        text = text.split("\n\n")
        items = []
@ -570,7 +576,7 @@ class HtmlBlockPreprocessor (Preprocessor):
            new_blocks.append(self.stash.store('\n\n'.join(items)))
            new_blocks.append('\n')
-        return "\n\n".join(new_blocks)   #.split("\n")
+        return "\n\n".join(new_blocks)
 HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor()
@ -648,9 +654,10 @@ So, we apply the expressions in the following order:
 NOBRACKET = r'[^\]\[]*'
 BRK = ( r'\[('
-        + (NOBRACKET + r'(\['+NOBRACKET)*6
+        + (NOBRACKET + r'(\[')*6
-        + (NOBRACKET+ r'\])*'+NOBRACKET)*6
+        + (NOBRACKET+ r'\])*')*6
        + NOBRACKET + r')\]' )
 NOIMG = r'(?<!\!)'
 BACKTICK_RE = r'\`([^\`]*)\`'                    # `e= m*c^2`
 DOUBLE_BACKTICK_RE =  r'\`\`(.*)\`\`'            # ``e=f("`")``
@ -667,10 +674,10 @@ else :
 STRONG_2_RE = r'__([^_]*)__'                     # __strong__
 STRONG_EM_2_RE = r'___([^_]*)___'                # ___strong___
-LINK_RE = BRK + r'\s*\(([^\)]*)\)'               # [text](url)
+LINK_RE = NOIMG + BRK + r'\s*\(([^\)]*)\)'               # [text](url)
-LINK_ANGLED_RE = BRK + r'\s*\(<([^\)]*)>\)'      # [text](<url>)
+LINK_ANGLED_RE = NOIMG + BRK + r'\s*\(<([^\)]*)>\)'      # [text](<url>)
 IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/)
-REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
+REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
 IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
 NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
 AUTOLINK_RE = r'<(http://[^>]*)>'                # <http://www.123.com>
@ -678,6 +685,8 @@ AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'               # <me@example.com>
 #HTML_RE = r'(\<[^\>]*\>)'                        # <...>
 HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)'               # <...>
 ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'                # &amp;
 LINE_BREAK_RE = r'  \n'                     # two spaces at end of line
 LINE_BREAK_2_RE = r'  $'                    # two spaces at end of text
 class Pattern:
@ -706,6 +715,11 @@ class SimpleTagPattern (Pattern):
        el.appendChild(doc.createTextNode(m.group(2)))
        return el
 class SubstituteTagPattern (SimpleTagPattern):
    def handleMatch (self, m, doc):
        return doc.createElement(self.tag)
 class BacktickPattern (Pattern):
    def __init__ (self, pattern):
@ -734,7 +748,9 @@ class DoubleTagPattern (SimpleTagPattern) :
 class HtmlPattern (Pattern):
    def handleMatch (self, m, doc):
-        place_holder = self.stash.store(m.group(2))
+        rawhtml = m.group(2)
        inline = True
        place_holder = self.stash.store(rawhtml)
        return doc.createTextNode(place_holder)
@ -762,7 +778,10 @@ class ImagePattern (Pattern):
    def handleMatch(self, m, doc):
        el = doc.createElement('img')
        src_parts = m.group(9).split()
        if src_parts:
            el.setAttribute('src', src_parts[0])
        else:
            el.setAttribute('src', "")
        if len(src_parts) > 1:
            el.setAttribute('title', dequote(" ".join(src_parts[1:])))
        if ENABLE_ATTRIBUTES:
@ -849,6 +868,9 @@ EMPHASIS_PATTERN_2      = SimpleTagPattern(EMPHASIS_2_RE, 'em')
 STRONG_EM_PATTERN       = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
 STRONG_EM_PATTERN_2     = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em')
 LINE_BREAK_PATTERN      = SubstituteTagPattern(LINE_BREAK_RE, 'br ')
 LINE_BREAK_PATTERN_2    = SubstituteTagPattern(LINE_BREAK_2_RE, 'br ')
 LINK_PATTERN            = LinkPattern(LINK_RE)
 LINK_ANGLED_PATTERN     = LinkPattern(LINK_ANGLED_RE)
 IMAGE_LINK_PATTERN      = ImagePattern(IMAGE_LINK_RE)
@ -882,6 +904,51 @@ class Postprocessor :
    pass
 """
 ======================================================================
 ======================== TEXT-POST-PROCESSORS ========================
 ======================================================================
 Markdown also allows text-post-processors, which are similar to
 textpreprocessors in that they need to implement a "run" method.  
 Unlike post-processors, they take a text string as a parameter and 
 should return a string.
 Text-Post-Processors should extend markdown.Postprocessor.
 """
 class RawHtmlTextPostprocessor(Postprocessor):
    def __init__(self):
        pass
    def run(self, text):
        for i in range(self.stash.html_counter):
            html, safe  = self.stash.rawHtmlBlocks[i]
            if self.safeMode and not safe:
                if str(self.safeMode).lower() == 'escape':
                    html = self.escape(html)
                elif str(self.safeMode).lower() == 'remove':
                    html = ''
                else:
                    html = HTML_REMOVED_TEXT
            text = text.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
                              html + "\n")
            text =  text.replace(HTML_PLACEHOLDER % i, html)
        return text
    def escape(self, html):
        ''' Basic html escaping '''
        html = html.replace('&', '&amp;')
        html = html.replace('<', '&lt;')
        html = html.replace('>', '&gt;')
        return html.replace('"', '&quot;')
 RAWHTMLTEXTPOSTPROCESSOR = RawHtmlTextPostprocessor()
 """
 ======================================================================
 ========================== MISC AUXILIARY CLASSES ====================
@ -896,14 +963,16 @@ class HtmlStash :
        self.html_counter = 0 # for counting inline html segments
        self.rawHtmlBlocks=[]
-    def store(self, html) :
+    def store(self, html, safe=False):
        """Saves an HTML segment for later reinsertion.  Returns a
           placeholder string that needs to be inserted into the
           document.
           @param html: an html segment
           @param safe: label an html segment as safe for safemode
           @param inline: label a segmant as inline html
           @returns : a placeholder string """
-        self.rawHtmlBlocks.append(html)
+        self.rawHtmlBlocks.append((html, safe))
        placeholder = HTML_PLACEHOLDER % self.html_counter
        self.html_counter += 1
        return placeholder
@ -925,6 +994,7 @@ class BlockGuru :
                      remainder of the original list"""
        items = []
        item = -1
        i = 0 # to keep track of where we are
@ -1043,31 +1113,30 @@ class Markdown:
        Markdown text """
-    def __init__(self, source=None,  # deprecated
+    def __init__(self, source=None,  # depreciated
                 extensions=[],
                 extension_configs=None,
                 encoding="utf-8",
                 safe_mode = False):
        """Creates a new Markdown instance.
-           @param source: The text in Markdown format.
+           @param source: The text in Markdown format. Depreciated!
-           @param encoding: The character encoding of <text>. """
+           @param extensions: A list if extensions.
           @param extension-configs: Configuration setting for extensions.
           @param safe_mode: Disallow raw html. """
        self.safeMode = safe_mode
        self.encoding = encoding
        self.source = source
        if source is not None:
            message(WARN, "The `source` arg of Markdown.__init__() is depreciated and will be removed in the future. Use `instance.convert(source)` instead.")
        self.safeMode = safe_mode
        self.blockGuru = BlockGuru()
        self.registeredExtensions = []
        self.stripTopLevelTags = 1
        self.docType = ""
        self.textPreprocessors = [HTML_BLOCK_PREPROCESSOR]
-        self.preprocessors = [ 
+        self.preprocessors = [HEADER_PREPROCESSOR,
                               HEADER_PREPROCESSOR,
                              LINE_PREPROCESSOR,
                               LINE_BREAKS_PREPROCESSOR,
                              # A footnote preprocessor will
                              # get inserted here
                              REFERENCE_PREPROCESSOR]
@ -1076,8 +1145,9 @@ class Markdown:
        self.postprocessors = [] # a footnote postprocessor will get
                                 # inserted later
-        self.textPostprocessors = [] # a footnote postprocessor will get
+        self.textPostprocessors = [# a footnote postprocessor will get
-                                     # inserted later                                 
+                                   # inserted here
                                   RAWHTMLTEXTPOSTPROCESSOR]
        self.prePatterns = []
@ -1085,13 +1155,15 @@ class Markdown:
        self.inlinePatterns = [DOUBLE_BACKTICK_PATTERN,
                               BACKTICK_PATTERN,
                               ESCAPE_PATTERN,
                                IMAGE_LINK_PATTERN,
                                IMAGE_REFERENCE_PATTERN,
                               REFERENCE_PATTERN,
                               LINK_ANGLED_PATTERN,
                               LINK_PATTERN,
                               IMAGE_LINK_PATTERN,
 			                   IMAGE_REFERENCE_PATTERN,
 			                   AUTOLINK_PATTERN,
                               AUTOMAIL_PATTERN,
                               #LINE_BREAK_PATTERN_2, Removed by Kovid as causes problems with mdx_tables
                               LINE_BREAK_PATTERN,
                               HTML_PATTERN,
                               ENTITY_PATTERN,
                               NOT_STRONG_PATTERN,
@ -1121,6 +1193,7 @@ class Markdown:
            try:
                module = sys.modules[extension_module_name]
            except:
                message(CRITICAL,
                        "couldn't load extension %s (looking for %s module)"
@ -1149,11 +1222,14 @@ class Markdown:
        self.htmlStash = HtmlStash()
        HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash
        LINE_PREPROCESSOR.stash = self.htmlStash
        REFERENCE_PREPROCESSOR.references = self.references
        HTML_PATTERN.stash = self.htmlStash
        ENTITY_PATTERN.stash = self.htmlStash
        REFERENCE_PATTERN.references = self.references
        IMAGE_REFERENCE_PATTERN.references = self.references
        RAWHTMLTEXTPOSTPROCESSOR.stash = self.htmlStash
        RAWHTMLTEXTPOSTPROCESSOR.safeMode = self.safeMode
        for extension in self.registeredExtensions:
            extension.reset()
@ -1173,7 +1249,7 @@ class Markdown:
        self.doc.appendChild(self.top_element)
        # Fixup the source text
-        text = self.source #.strip()
+        text = self.source
        text = text.replace("\r\n", "\n").replace("\r", "\n")
        text += "\n\n"
        text = text.expandtabs(TAB_LENGTH)
@ -1226,7 +1302,9 @@ class Markdown:
           @param inList: a level
           @returns: None"""
        # Loop through lines until none left.
        while lines:
            # Check if this section starts with a list, a blockquote or
            # a code block
@ -1257,6 +1335,7 @@ class Markdown:
            #
            if inList:
                start, lines  = self._linesUntil(lines, (lambda line:
                                 RE.regExp['ul'].match(line)
                                 or RE.regExp['ol'].match(line)
@ -1264,15 +1343,25 @@ class Markdown:
                self._processSection(parent_elem, start,
                                     inList - 1, looseList = looseList)
-                self._processSection(parent_elem, lines,
+                inList = inList-1
                                     inList - 1, looseList = looseList)
            else: # Ok, so it's just a simple block
                paragraph, lines = self._linesUntil(lines, lambda line:
                                                     not line.strip())
                if len(paragraph) and paragraph[0].startswith('#'):
                    self._processHeader(parent_elem, paragraph)
                elif paragraph:
                    self._processParagraph(parent_elem, paragraph,
                                          inList, looseList)
            if lines and not lines[0].strip():
                lines = lines[1:]  # skip the first (blank) line
    def _processHeader(self, parent_elem, paragraph):
        m = RE.regExp['header'].match(paragraph[0])
        if m:
            level = len(m.group(1))
@ -1283,13 +1372,13 @@ class Markdown:
        else:
            message(CRITICAL, "We've got a problem header!")
-                elif paragraph :
+
    def _processParagraph(self, parent_elem, paragraph, inList, looseList):
        list = self._handleInlineWrapper("\n".join(paragraph))
        if ( parent_elem.nodeName == 'li'
                and not (looseList or parent_elem.childNodes)):
                        #and not parent_elem.childNodes) :
            # If this is the first paragraph inside "li", don't
            # put <p> around it - append the paragraph bits directly
            # onto parent_elem
@ -1302,10 +1391,6 @@ class Markdown:
        for item in list:
            el.appendChild(item)
            if lines and not lines[0].strip():
                lines = lines[1:]  # skip the first (blank) line
    def _processUList(self, parent_elem, lines, inList):
        self._processList(parent_elem, lines, inList,
@ -1428,15 +1513,22 @@ class Markdown:
        dequoted = []
        i = 0
        blank_line = False # allow one blank line between paragraphs
        for line in lines:
            m = RE.regExp['quoted'].match(line)
            if m:
                dequoted.append(m.group(1))
                i += 1
                blank_line = False
            elif not blank_line and line.strip() != '':
                dequoted.append(line)
                i += 1
            elif not blank_line and line.strip() == '':
                dequoted.append(line)
                i += 1
                blank_line = True
            else:
                break
        else :
            i += 1
        blockquote = self.doc.createElement('blockquote')
        parent_elem.appendChild(blockquote)
@ -1471,11 +1563,11 @@ class Markdown:
-    def _handleInlineWrapper (self, line) :
+    def _handleInlineWrapper (self, line, patternIndex=0):
        parts = [line]
-        for pattern in self.inlinePatterns :
+        while patternIndex < len(self.inlinePatterns):
            i = 0
@ -1484,7 +1576,9 @@ class Markdown:
                x = parts[i]
                if isinstance(x, (str, unicode)):
-                    result = self._applyPattern(x, pattern)
+                    result = self._applyPattern(x, \
                                self.inlinePatterns[patternIndex], \
                                patternIndex)
                    if result:
                        i -= 1
@ -1493,6 +1587,7 @@ class Markdown:
                            parts.insert(i+1,y)
                i += 1
            patternIndex += 1
        for i in range(len(parts)):
            x = parts[i]
@ -1521,7 +1616,7 @@ class Markdown:
        return [self.doc.createTextNode(line)]
-    def _applyPattern(self, line, pattern) :
+    def _applyPattern(self, line, pattern, patternIndex=0):
        """ Given a pattern name, this function checks if the line
        fits the pattern, creates the necessary elements, and returns
@ -1555,7 +1650,7 @@ class Markdown:
                for child in node.childNodes:
                    if isinstance(child, TextNode):
-                        result = self._handleInlineWrapper(child.value)
+                        result = self._handleInlineWrapper(child.value, patternIndex+1)
                        if result:
@ -1594,15 +1689,18 @@ class Markdown:
        """Return the document in XHTML format.
        @returns: A serialized XHTML body."""
        #try :
-        if source :
+        if source is not None: #Allow blank string
            self.source = source
        if not self.source:
-            return ""
+            return u""
-        self.source = removeBOM(self.source, self.encoding)
+        try:
            self.source = unicode(self.source)
        except UnicodeDecodeError:
            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii  input.')
            return u""
        for pp in self.textPreprocessors:
            self.source = pp.run(self.source)
@ -1610,22 +1708,8 @@ class Markdown:
        doc = self._transform()
        xml = doc.toxml()
        #finally:
        #    doc.unlink()
-        # Let's stick in all the raw html pieces
+        # Return everything but the top level tag
        for i in range(self.htmlStash.html_counter) :
            html = self.htmlStash.rawHtmlBlocks[i]
            if self.safeMode and html != "<hr />" and html != "<br />":
                html = HTML_REMOVED_TEXT
            xml = xml.replace("<p>%s\n</p>" % (HTML_PLACEHOLDER % i),
                              html + "\n")
            xml = xml.replace(HTML_PLACEHOLDER % i,
                              html)
        # And return everything but the top level tag
        if self.stripTopLevelTags:
            xml = xml.strip()[23:-7] + "\n"
@ -1636,20 +1720,18 @@ class Markdown:
        return (self.docType + xml).strip()
-    __str__ = convert   # deprecated - will be changed in 1.7 to report
+    def __str__(self):
-                        # information about the MD instance
+        ''' Report info about instance. Markdown always returns unicode. '''
        if self.source is None:
            status = 'in which no source text has been assinged.'
        else:
            status = 'which contains %d chars and %d line(s) of source.'%\
                     (len(self.source), self.source.count('\n')+1)
        return 'An instance of "%s" %s'% (self.__class__, status)
-    toString = __str__  # toString() method is deprecated
+    __unicode__ = convert # markdown should always return a unicode string
    def __unicode__(self):
        """Return the document in XHTML format as a Unicode object.
        """
        return str(self)#.decode(self.encoding)
    toUnicode = __unicode__  # deprecated - will be removed in 1.7
@ -1662,11 +1744,10 @@ def markdownFromFile(input = None,
                     message_threshold = CRITICAL,
                     safe = False):
-    global MESSAGE_THRESHOLD
+    global console_hndlr
-    MESSAGE_THRESHOLD = message_threshold
+    console_hndlr.setLevel(message_threshold)
    message(VERBOSE, "input file: %s" % input)
    message(DEBUG, "input file: %s" % input)
    if not encoding:
        encoding = "utf-8"
@ -1675,7 +1756,9 @@ def markdownFromFile(input = None,
    text = input_file.read()
    input_file.close()
-    new_text = markdown(text, extensions, encoding, safe_mode = safe)
+    text = removeBOM(text, encoding)
    new_text = markdown(text, extensions, safe_mode = safe)
    if output:
        output_file = codecs.open(output, "w", encoding=encoding)
@ -1687,10 +1770,9 @@ def markdownFromFile(input = None,
 def markdown(text,
             extensions = [],
             encoding = None,
             safe_mode = False):
-    message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text)
+    message(DEBUG, "in markdown.markdown(), received text:\n%s" % text)
    extension_names = []
    extension_configs = {}
@ -1764,17 +1846,17 @@ def parse_options() :
    parser.add_option("-e", "--encoding", dest="encoding",
                      help="encoding for input and output files",)
    parser.add_option("-q", "--quiet", default = CRITICAL,
-                      action="store_const", const=NONE, dest="verbose",
+                      action="store_const", const=60, dest="verbose",
                      help="suppress all messages")
    parser.add_option("-v", "--verbose",
                      action="store_const", const=INFO, dest="verbose",
                      help="print info messages")
-    parser.add_option("-s", "--safe",
+    parser.add_option("-s", "--safe", dest="safe", default=False,
-                      action="store_const", const=True, dest="safe",
+                      metavar="SAFE_MODE",
-                      help="same mode (strip user's HTML tag)")
+                      help="same mode ('replace', 'remove' or 'escape'  user's HTML tag)")
    parser.add_option("--noisy",
-                      action="store_const", const=VERBOSE, dest="verbose",
+                      action="store_const", const=DEBUG, dest="verbose",
                      help="print debug messages")
    parser.add_option("-x", "--extension", action="append", dest="extensions",
                      help = "load extension EXTENSION", metavar="EXTENSION")
@ -1799,28 +1881,10 @@ def parse_options() :
 def main():
    options = parse_options()
    #if os.access(inFile, os.R_OK):
    if not options:
        sys.exit(0)
    markdownFromFile(**options)
        return 0
-
+    markdownFromFile(**options)
 if __name__ == '__main__':
    """ Run Markdown from the command line. """
-    sys.exit(main())
+    sys.exit(main)