Merge branch 'markdown' of https://github.com/user-none/calibre

TXT Input: Update bundled python markdown library to 2.3.1
2025-07-09 03:04:10 -04:00 · 2013-09-25 07:41:30 +05:30 · 2013-09-25 07:41:30 +05:30 · 31accb5679
commit 31accb5679
parent 0bcee3f721 cc3466eee1
34 changed files with 2671 additions and 1816 deletions
--- a/src/calibre/ebooks/markdown/init.py
+++ b/src/calibre/ebooks/markdown/init.py
@ -0,0 +1,443 @@
+"""
+Python Markdown
+===============
+
+Python Markdown converts Markdown to HTML and can be used as a library or
+called from the command line.
+
+## Basic usage as a module:
+
+    import markdown
+    html = markdown.markdown(your_text_string)
+
+See <http://packages.python.org/Markdown/> for more
+information and instructions on how to extend the functionality of
+Python Markdown.  Read that before you try modifying this file.
+
+## Authors and License
+
+Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
+maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
+Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
+
+Contact: markdown@freewisdom.org
+
+Copyright 2007-2013 The Python Markdown Project (v. 1.7 and later)
+Copyright 200? Django Software Foundation (OrderedDict implementation)
+Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
+Copyright 2004 Manfred Stienstra (the original version)
+
+License: BSD (see LICENSE for details).
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from .__version__ import version, version_info
+import re
+import codecs
+import sys
+import logging
+from . import util
+from .preprocessors import build_preprocessors
+from .blockprocessors import build_block_parser
+from .treeprocessors import build_treeprocessors
+from .inlinepatterns import build_inlinepatterns
+from .postprocessors import build_postprocessors
+from .extensions import Extension
+from .serializers import to_html_string, to_xhtml_string
+
+__all__ = ['Markdown', 'markdown', 'markdownFromFile']
+
+logger = logging.getLogger('MARKDOWN')
+
+
+class Markdown(object):
+    """Convert Markdown to HTML."""
+
+    doc_tag = "div"     # Element used to wrap document - later removed
+
+    option_defaults = {
+        'html_replacement_text' : '[HTML_REMOVED]',
+        'tab_length'            : 4,
+        'enable_attributes'     : True,
+        'smart_emphasis'        : True,
+        'lazy_ol'               : True,
+    }
+
+    output_formats = {
+        'html'  : to_html_string,
+        'html4' : to_html_string,
+        'html5' : to_html_string,
+        'xhtml' : to_xhtml_string,
+        'xhtml1': to_xhtml_string,
+        'xhtml5': to_xhtml_string,
+    }
+
+    ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
+                    '(', ')', '>', '#', '+', '-', '.', '!']
+
+    def __init__(self, *args, **kwargs):
+        """
+        Creates a new Markdown instance.
+
+        Keyword arguments:
+
+        * extensions: A list of extensions.
+           If they are of type string, the module mdx_name.py will be loaded.
+           If they are a subclass of markdown.Extension, they will be used
+           as-is.
+        * extension_configs: Configuration settingis for extensions.
+        * output_format: Format of output. Supported formats are:
+            * "xhtml1": Outputs XHTML 1.x. Default.
+            * "xhtml5": Outputs XHTML style tags of HTML 5
+            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
+            * "html4": Outputs HTML 4
+            * "html5": Outputs HTML style tags of HTML 5
+            * "html": Outputs latest supported version of HTML (currently HTML 4).
+            Note that it is suggested that the more specific formats ("xhtml1"
+            and "html4") be used as "xhtml" or "html" may change in the future
+            if it makes sense at that time.
+        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
+        * html_replacement_text: Text used when safe_mode is set to "replace".
+        * tab_length: Length of tabs in the source. Default: 4
+        * enable_attributes: Enable the conversion of attributes. Default: True
+        * smart_emphasis: Treat `_connected_words_` intelegently Default: True
+        * lazy_ol: Ignore number of first item of ordered lists. Default: True
+
+        """
+
+        # For backward compatibility, loop through old positional args
+        pos = ['extensions', 'extension_configs', 'safe_mode', 'output_format']
+        c = 0
+        for arg in args:
+            if pos[c] not in kwargs:
+                kwargs[pos[c]] = arg
+            c += 1
+            if c == len(pos):
+                # ignore any additional args
+                break
+
+        # Loop through kwargs and assign defaults
+        for option, default in self.option_defaults.items():
+            setattr(self, option, kwargs.get(option, default))
+
+        self.safeMode = kwargs.get('safe_mode', False)
+        if self.safeMode and 'enable_attributes' not in kwargs:
+            # Disable attributes in safeMode when not explicitly set
+            self.enable_attributes = False
+
+        self.registeredExtensions = []
+        self.docType = ""
+        self.stripTopLevelTags = True
+
+        self.build_parser()
+
+        self.references = {}
+        self.htmlStash = util.HtmlStash()
+        self.set_output_format(kwargs.get('output_format', 'xhtml1'))
+        self.registerExtensions(extensions=kwargs.get('extensions', []),
+                                configs=kwargs.get('extension_configs', {}))
+        self.reset()
+
+    def build_parser(self):
+        """ Build the parser from the various parts. """
+        self.preprocessors = build_preprocessors(self)
+        self.parser = build_block_parser(self)
+        self.inlinePatterns = build_inlinepatterns(self)
+        self.treeprocessors = build_treeprocessors(self)
+        self.postprocessors = build_postprocessors(self)
+        return self
+
+    def registerExtensions(self, extensions, configs):
+        """
+        Register extensions with this instance of Markdown.
+
+        Keyword arguments:
+
+        * extensions: A list of extensions, which can either
+           be strings or objects.  See the docstring on Markdown.
+        * configs: A dictionary mapping module names to config options.
+
+        """
+        for ext in extensions:
+            if isinstance(ext, util.string_type):
+                ext = self.build_extension(ext, configs.get(ext, []))
+            if isinstance(ext, Extension):
+                ext.extendMarkdown(self, globals())
+            elif ext is not None:
+                raise TypeError(
+                    'Extension "%s.%s" must be of type: "markdown.Extension"'
+                    % (ext.__class__.__module__, ext.__class__.__name__))
+
+        return self
+
+    def build_extension(self, ext_name, configs = []):
+        """Build extension by name, then return the module.
+
+        The extension name may contain arguments as part of the string in the
+        following format: "extname(key1=value1,key2=value2)"
+
+        """
+
+        # Parse extensions config params (ignore the order)
+        configs = dict(configs)
+        pos = ext_name.find("(") # find the first "("
+        if pos > 0:
+            ext_args = ext_name[pos+1:-1]
+            ext_name = ext_name[:pos]
+            pairs = [x.split("=") for x in ext_args.split(",")]
+            configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
+
+        # Setup the module name
+        module_name = ext_name
+        if '.' not in ext_name:
+            module_name = '.'.join(['calibre.ebooks.markdown.extensions', ext_name])
+
+        # Try loading the extension first from one place, then another
+        try: # New style (markdown.extensons.<extension>)
+            module = __import__(module_name, {}, {}, [module_name.rpartition('.')[0]])
+        except ImportError:
+            module_name_old_style = '_'.join(['mdx', ext_name])
+            try: # Old style (mdx_<extension>)
+                module = __import__(module_name_old_style)
+            except ImportError as e:
+                message = "Failed loading extension '%s' from '%s' or '%s'" \
+                    % (ext_name, module_name, module_name_old_style)
+                e.args = (message,) + e.args[1:]
+                raise
+
+        # If the module is loaded successfully, we expect it to define a
+        # function called makeExtension()
+        try:
+            return module.makeExtension(configs.items())
+        except AttributeError as e:
+            message = e.args[0]
+            message = "Failed to initiate extension " \
+                      "'%s': %s" % (ext_name, message)
+            e.args = (message,) + e.args[1:]
+            raise
+
+    def registerExtension(self, extension):
+        """ This gets called by the extension """
+        self.registeredExtensions.append(extension)
+        return self
+
+    def reset(self):
+        """
+        Resets all state variables so that we can start with a new text.
+        """
+        self.htmlStash.reset()
+        self.references.clear()
+
+        for extension in self.registeredExtensions:
+            if hasattr(extension, 'reset'):
+                extension.reset()
+
+        return self
+
+    def set_output_format(self, format):
+        """ Set the output format for the class instance. """
+        self.output_format = format.lower()
+        try:
+            self.serializer = self.output_formats[self.output_format]
+        except KeyError as e:
+            valid_formats = list(self.output_formats.keys())
+            valid_formats.sort()
+            message = 'Invalid Output Format: "%s". Use one of %s.' \
+                       % (self.output_format, 
+                          '"' + '", "'.join(valid_formats) + '"')
+            e.args = (message,) + e.args[1:]
+            raise
+        return self
+
+    def convert(self, source):
+        """
+        Convert markdown to serialized XHTML or HTML.
+
+        Keyword arguments:
+
+        * source: Source text as a Unicode string.
+
+        Markdown processing takes place in five steps:
+
+        1. A bunch of "preprocessors" munge the input text.
+        2. BlockParser() parses the high-level structural elements of the
+           pre-processed text into an ElementTree.
+        3. A bunch of "treeprocessors" are run against the ElementTree. One
+           such treeprocessor runs InlinePatterns against the ElementTree,
+           detecting inline markup.
+        4. Some post-processors are run against the text after the ElementTree
+           has been serialized into text.
+        5. The output is written to a string.
+
+        """
+
+        # Fixup the source text
+        if not source.strip():
+            return ''  # a blank unicode string
+
+        try:
+            source = util.text_type(source)
+        except UnicodeDecodeError as e:
+            # Customise error message while maintaining original trackback
+            e.reason += '. -- Note: Markdown only accepts unicode input!'
+            raise
+
+        # Split into lines and run the line preprocessors.
+        self.lines = source.split("\n")
+        for prep in self.preprocessors.values():
+            self.lines = prep.run(self.lines)
+
+        # Parse the high-level elements.
+        root = self.parser.parseDocument(self.lines).getroot()
+
+        # Run the tree-processors
+        for treeprocessor in self.treeprocessors.values():
+            newRoot = treeprocessor.run(root)
+            if newRoot:
+                root = newRoot
+
+        # Serialize _properly_.  Strip top-level tags.
+        output = self.serializer(root)
+        if self.stripTopLevelTags:
+            try:
+                start = output.index('<%s>'%self.doc_tag)+len(self.doc_tag)+2
+                end = output.rindex('</%s>'%self.doc_tag)
+                output = output[start:end].strip()
+            except ValueError:
+                if output.strip().endswith('<%s />'%self.doc_tag):
+                    # We have an empty document
+                    output = ''
+                else:
+                    # We have a serious problem
+                    raise ValueError('Markdown failed to strip top-level tags. Document=%r' % output.strip())
+
+        # Run the text post-processors
+        for pp in self.postprocessors.values():
+            output = pp.run(output)
+
+        return output.strip()
+
+    def convertFile(self, input=None, output=None, encoding=None):
+        """Converts a markdown file and returns the HTML as a unicode string.
+
+        Decodes the file using the provided encoding (defaults to utf-8),
+        passes the file content to markdown, and outputs the html to either
+        the provided stream or the file with provided name, using the same
+        encoding as the source file. The 'xmlcharrefreplace' error handler is
+        used when encoding the output.
+
+        **Note:** This is the only place that decoding and encoding of unicode
+        takes place in Python-Markdown.  (All other code is unicode-in /
+        unicode-out.)
+
+        Keyword arguments:
+
+        * input: File object or path. Reads from stdin if `None`.
+        * output: File object or path. Writes to stdout if `None`.
+        * encoding: Encoding of input and output files. Defaults to utf-8.
+
+        """
+
+        encoding = encoding or "utf-8"
+
+        # Read the source
+        if input:
+            if isinstance(input, util.string_type):
+                input_file = codecs.open(input, mode="r", encoding=encoding)
+            else:
+                input_file = codecs.getreader(encoding)(input)
+            text = input_file.read()
+            input_file.close()
+        else:
+            text = sys.stdin.read()
+            if not isinstance(text, util.text_type):
+                text = text.decode(encoding)
+
+        text = text.lstrip('\ufeff') # remove the byte-order mark
+
+        # Convert
+        html = self.convert(text)
+
+        # Write to file or stdout
+        if output:
+            if isinstance(output, util.string_type):
+                output_file = codecs.open(output, "w",
+                                          encoding=encoding,
+                                          errors="xmlcharrefreplace")
+                output_file.write(html)
+                output_file.close()
+            else:
+                writer = codecs.getwriter(encoding)
+                output_file = writer(output, errors="xmlcharrefreplace")
+                output_file.write(html)
+                # Don't close here. User may want to write more.
+        else:
+            # Encode manually and write bytes to stdout. 
+            html = html.encode(encoding, "xmlcharrefreplace")
+            try:
+                # Write bytes directly to buffer (Python 3).
+                sys.stdout.buffer.write(html)
+            except AttributeError:
+                # Probably Python 2, which works with bytes by default.
+                sys.stdout.write(html)
+
+        return self
+
+
+"""
+EXPORTED FUNCTIONS
+=============================================================================
+
+Those are the two functions we really mean to export: markdown() and
+markdownFromFile().
+"""
+
+def markdown(text, *args, **kwargs):
+    """Convert a markdown string to HTML and return HTML as a unicode string.
+
+    This is a shortcut function for `Markdown` class to cover the most
+    basic use case.  It initializes an instance of Markdown, loads the
+    necessary extensions and runs the parser on the given text.
+
+    Keyword arguments:
+
+    * text: Markdown formatted text as Unicode or ASCII string.
+    * Any arguments accepted by the Markdown class.
+
+    Returns: An HTML document as a string.
+
+    """
+    md = Markdown(*args, **kwargs)
+    return md.convert(text)
+
+
+def markdownFromFile(*args, **kwargs):
+    """Read markdown code from a file and write it to a file or a stream.
+
+    This is a shortcut function which initializes an instance of Markdown,
+    and calls the convertFile method rather than convert.
+
+    Keyword arguments:
+
+    * input: a file name or readable object.
+    * output: a file name or writable object.
+    * encoding: Encoding of input and output.
+    * Any arguments accepted by the Markdown class.
+
+    """
+    # For backward compatibility loop through positional args
+    pos = ['input', 'output', 'extensions', 'encoding']
+    c = 0
+    for arg in args:
+        if pos[c] not in kwargs:
+            kwargs[pos[c]] = arg
+        c += 1
+        if c == len(pos):
+            break
+
+    md = Markdown(**kwargs)
+    md.convertFile(kwargs.get('input', None),
+                   kwargs.get('output', None),
+                   kwargs.get('encoding', None))
+
--- a/src/calibre/ebooks/markdown/main.py
+++ b/src/calibre/ebooks/markdown/main.py
@ -0,0 +1,87 @@
+"""
+COMMAND-LINE SPECIFIC STUFF
+=============================================================================
+
+"""
+
+import markdown
+import sys
+import optparse
+
+import logging
+from logging import DEBUG, INFO, CRITICAL
+
+logger =  logging.getLogger('MARKDOWN')
+
+def parse_options():
+    """
+    Define and parse `optparse` options for command-line usage.
+    """
+    usage = """%prog [options] [INPUTFILE]
+       (STDIN is assumed if no INPUTFILE is given)"""
+    desc = "A Python implementation of John Gruber's Markdown. " \
+           "http://packages.python.org/Markdown/"
+    ver = "%%prog %s" % markdown.version
+    
+    parser = optparse.OptionParser(usage=usage, description=desc, version=ver)
+    parser.add_option("-f", "--file", dest="filename", default=None,
+                      help="Write output to OUTPUT_FILE. Defaults to STDOUT.",
+                      metavar="OUTPUT_FILE")
+    parser.add_option("-e", "--encoding", dest="encoding",
+                      help="Encoding for input and output files.",)
+    parser.add_option("-q", "--quiet", default = CRITICAL,
+                      action="store_const", const=CRITICAL+10, dest="verbose",
+                      help="Suppress all warnings.")
+    parser.add_option("-v", "--verbose",
+                      action="store_const", const=INFO, dest="verbose",
+                      help="Print all warnings.")
+    parser.add_option("-s", "--safe", dest="safe", default=False,
+                      metavar="SAFE_MODE",
+                      help="'replace', 'remove' or 'escape' HTML tags in input")
+    parser.add_option("-o", "--output_format", dest="output_format", 
+                      default='xhtml1', metavar="OUTPUT_FORMAT",
+                      help="'xhtml1' (default), 'html4' or 'html5'.")
+    parser.add_option("--noisy",
+                      action="store_const", const=DEBUG, dest="verbose",
+                      help="Print debug messages.")
+    parser.add_option("-x", "--extension", action="append", dest="extensions",
+                      help = "Load extension EXTENSION.", metavar="EXTENSION")
+    parser.add_option("-n", "--no_lazy_ol", dest="lazy_ol", 
+                      action='store_false', default=True,
+                      help="Observe number of first item of ordered lists.")
+
+    (options, args) = parser.parse_args()
+
+    if len(args) == 0:
+        input_file = None
+    else:
+        input_file = args[0]
+
+    if not options.extensions:
+        options.extensions = []
+
+    return {'input': input_file,
+            'output': options.filename,
+            'safe_mode': options.safe,
+            'extensions': options.extensions,
+            'encoding': options.encoding,
+            'output_format': options.output_format,
+            'lazy_ol': options.lazy_ol}, options.verbose
+
+def run():
+    """Run Markdown from the command line."""
+
+    # Parse options and adjust logging level if necessary
+    options, logging_level = parse_options()
+    if not options: sys.exit(2)
+    logger.setLevel(logging_level)
+    logger.addHandler(logging.StreamHandler())
+
+    # Run
+    markdown.markdownFromFile(**options)
+
+if __name__ == '__main__':
+    # Support running module as a commandline command. 
+    # Python 2.5 & 2.6 do: `python -m markdown.__main__ [options] [args]`.
+    # Python 2.7 & 3.x do: `python -m markdown [options] [args]`.
+    run()
--- a/src/calibre/ebooks/markdown/version.py
+++ b/src/calibre/ebooks/markdown/version.py
@ -0,0 +1,28 @@
+#
+# markdown/__version__.py
+#
+# version_info should conform to PEP 386 
+# (major, minor, micro, alpha/beta/rc/final, #)
+# (1, 1, 2, 'alpha', 0) => "1.1.2.dev"
+# (1, 2, 0, 'beta', 2) => "1.2b2"
+version_info = (2, 3, 1, 'final', 0)
+
+def _get_version():
+    " Returns a PEP 386-compliant version number from version_info. "
+    assert len(version_info) == 5
+    assert version_info[3] in ('alpha', 'beta', 'rc', 'final')
+
+    parts = 2 if version_info[2] == 0 else 3
+    main = '.'.join(map(str, version_info[:parts]))
+
+    sub = ''
+    if version_info[3] == 'alpha' and version_info[4] == 0:
+        # TODO: maybe append some sort of git info here??
+        sub = '.dev'
+    elif version_info[3] != 'final':
+        mapping = {'alpha': 'a', 'beta': 'b', 'rc': 'c'}
+        sub = mapping[version_info[3]] + str(version_info[4])
+
+    return str(main + sub)
+
+version = _get_version()
--- a/src/calibre/ebooks/markdown/blockparser.py
+++ b/src/calibre/ebooks/markdown/blockparser.py
@ -1,5 +1,7 @@
-
-import markdown
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+from . import odict

 class State(list):
    """ Track the current and nested state of the parser. 
@ -41,9 +43,10 @@ class BlockParser:
    looping through them and creating an ElementTree object.
    """

-    def __init__(self):
-        self.blockprocessors = markdown.odict.OrderedDict()
+    def __init__(self, markdown):
+        self.blockprocessors = odict.OrderedDict()
        self.state = State()
+        self.markdown = markdown

    def parseDocument(self, lines):
        """ Parse a markdown document into an ElementTree. 
@ -56,9 +59,9 @@ class BlockParser:

        """
        # Create a ElementTree from the lines
-        self.root = markdown.etree.Element(markdown.DOC_TAG)
+        self.root = util.etree.Element(self.markdown.doc_tag)
        self.parseChunk(self.root, '\n'.join(lines))
-        return markdown.etree.ElementTree(self.root)
+        return util.etree.ElementTree(self.root)

    def parseChunk(self, parent, text):
        """ Parse a chunk of markdown text and attach to given etree node. 
@ -87,9 +90,10 @@ class BlockParser:

        """
        while blocks:
-           for processor in self.blockprocessors.values():
-               if processor.test(parent, blocks[0]):
-                   processor.run(parent, blocks)
-                   break
+            for processor in self.blockprocessors.values():
+                if processor.test(parent, blocks[0]):
+                    if processor.run(parent, blocks) is not False:
+                        # run returns True or None
+                        break


--- a/src/calibre/ebooks/markdown/blockprocessors.py
+++ b/src/calibre/ebooks/markdown/blockprocessors.py
@ -1,6 +1,6 @@
 """
 CORE MARKDOWN BLOCKPARSER
-=============================================================================
+===========================================================================

 This parser handles basic parsing of Markdown blocks.  It doesn't concern itself
 with inline elements such as **bold** or *italics*, but rather just catches
@ -9,11 +9,34 @@ blocks, lists, quotes, etc.
 The BlockParser is made up of a bunch of BlockProssors, each handling a
 different type of block. Extensions may add/replace/remove BlockProcessors
 as they need to alter how markdown blocks are parsed.
-
 """

+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+import logging
 import re
-import markdown
+from . import util
+from .blockparser import BlockParser
+
+logger =  logging.getLogger('MARKDOWN')
+
+
+def build_block_parser(md_instance, **kwargs):
+    """ Build the default block parser used by Markdown. """
+    parser = BlockParser(md_instance)
+    parser.blockprocessors['empty'] = EmptyBlockProcessor(parser)
+    parser.blockprocessors['indent'] = ListIndentProcessor(parser)
+    parser.blockprocessors['code'] = CodeBlockProcessor(parser)
+    parser.blockprocessors['hashheader'] = HashHeaderProcessor(parser)
+    parser.blockprocessors['setextheader'] = SetextHeaderProcessor(parser)
+    parser.blockprocessors['hr'] = HRProcessor(parser)
+    parser.blockprocessors['olist'] = OListProcessor(parser)
+    parser.blockprocessors['ulist'] = UListProcessor(parser)
+    parser.blockprocessors['quote'] = BlockQuoteProcessor(parser)
+    parser.blockprocessors['paragraph'] = ParagraphProcessor(parser)
+    return parser
+

 class BlockProcessor:
    """ Base class for block processors. 
@ -26,8 +49,9 @@ class BlockProcessor:

    """

-    def __init__(self, parser=None):
+    def __init__(self, parser):
        self.parser = parser
+        self.tab_length = parser.markdown.tab_length

    def lastChild(self, parent):
        """ Return the last child of an etree element. """
@ -41,8 +65,8 @@ class BlockProcessor:
        newtext = []
        lines = text.split('\n')
        for line in lines:
-            if line.startswith(' '*markdown.TAB_LENGTH):
-                newtext.append(line[markdown.TAB_LENGTH:])
+            if line.startswith(' '*self.tab_length):
+                newtext.append(line[self.tab_length:])
            elif not line.strip():
                newtext.append('')
            else:
@ -53,8 +77,8 @@ class BlockProcessor:
        """ Remove a tab from front of lines but allowing dedented lines. """
        lines = text.split('\n')
        for i in range(len(lines)):
-            if lines[i].startswith(' '*markdown.TAB_LENGTH*level):
-                lines[i] = lines[i][markdown.TAB_LENGTH*level:]
+            if lines[i].startswith(' '*self.tab_length*level):
+                lines[i] = lines[i][self.tab_length*level:]
        return '\n'.join(lines)

    def test(self, parent, block):
@ -113,12 +137,15 @@ class ListIndentProcessor(BlockProcessor):

    """

-    INDENT_RE = re.compile(r'^(([ ]{%s})+)'% markdown.TAB_LENGTH)
    ITEM_TYPES = ['li']
    LIST_TYPES = ['ul', 'ol']

+    def __init__(self, *args):
+        BlockProcessor.__init__(self, *args)
+        self.INDENT_RE = re.compile(r'^(([ ]{%s})+)'% self.tab_length)
+
    def test(self, parent, block):
-        return block.startswith(' '*markdown.TAB_LENGTH) and \
+        return block.startswith(' '*self.tab_length) and \
                not self.parser.state.isstate('detabbed') and  \
                (parent.tag in self.ITEM_TYPES or \
                    (len(parent) and parent[-1] and \
@ -133,8 +160,16 @@ class ListIndentProcessor(BlockProcessor):

        self.parser.state.set('detabbed')
        if parent.tag in self.ITEM_TYPES:
-            # The parent is already a li. Just parse the child block.
-            self.parser.parseBlocks(parent, [block])
+            # It's possible that this parent has a 'ul' or 'ol' child list
+            # with a member.  If that is the case, then that should be the
+            # parent.  This is intended to catch the edge case of an indented 
+            # list whose first member was parsed previous to this point
+            # see OListProcessor
+            if len(parent) and parent[-1].tag in self.LIST_TYPES:
+                self.parser.parseBlocks(parent[-1], [block])
+            else:
+                # The parent is already a li. Just parse the child block.
+                self.parser.parseBlocks(parent, [block])
        elif sibling.tag in self.ITEM_TYPES:
            # The sibling is a li. Use it as parent.
            self.parser.parseBlocks(sibling, [block])
@ -143,8 +178,12 @@ class ListIndentProcessor(BlockProcessor):
            # Assume the last child li is the parent of this block.
            if sibling[-1].text:
                # If the parent li has text, that text needs to be moved to a p
-                block = '%s\n\n%s' % (sibling[-1].text, block)
+                # The p must be 'inserted' at beginning of list in the event
+                # that other children already exist i.e.; a nested sublist.
+                p = util.etree.Element('p')
+                p.text = sibling[-1].text
                sibling[-1].text = ''
+                sibling[-1].insert(0, p)
            self.parser.parseChunk(sibling[-1], block)
        else:
            self.create_item(sibling, block)
@ -152,7 +191,7 @@ class ListIndentProcessor(BlockProcessor):

    def create_item(self, parent, block):
        """ Create a new li and parse the block with it as the parent. """
-        li = markdown.etree.SubElement(parent, 'li')
+        li = util.etree.SubElement(parent, 'li')
        self.parser.parseBlocks(li, [block])
 
    def get_level(self, parent, block):
@ -160,7 +199,7 @@ class ListIndentProcessor(BlockProcessor):
        # Get indent level
        m = self.INDENT_RE.match(block)
        if m:
-            indent_level = len(m.group(1))/markdown.TAB_LENGTH
+            indent_level = len(m.group(1))/self.tab_length
        else:
            indent_level = 0
        if self.parser.state.isstate('list'):
@ -187,7 +226,7 @@ class CodeBlockProcessor(BlockProcessor):
    """ Process code blocks. """

    def test(self, parent, block):
-        return block.startswith(' '*markdown.TAB_LENGTH)
+        return block.startswith(' '*self.tab_length)
    
    def run(self, parent, blocks):
        sibling = self.lastChild(parent)
@ -200,13 +239,13 @@ class CodeBlockProcessor(BlockProcessor):
            # linebreaks removed from the split into a list.
            code = sibling[0]
            block, theRest = self.detab(block)
-            code.text = markdown.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
+            code.text = util.AtomicString('%s\n%s\n' % (code.text, block.rstrip()))
        else:
            # This is a new codeblock. Create the elements and insert text.
-            pre = markdown.etree.SubElement(parent, 'pre')
-            code = markdown.etree.SubElement(pre, 'code')
+            pre = util.etree.SubElement(parent, 'pre')
+            code = util.etree.SubElement(pre, 'code')
            block, theRest = self.detab(block)
-            code.text = markdown.AtomicString('%s\n' % block.rstrip())
+            code.text = util.AtomicString('%s\n' % block.rstrip())
        if theRest:
            # This block contained unindented line(s) after the first indented 
            # line. Insert these lines as the first block of the master blocks
@ -237,9 +276,12 @@ class BlockQuoteProcessor(BlockProcessor):
            quote = sibling
        else:
            # This is a new blockquote. Create a new parent element.
-            quote = markdown.etree.SubElement(parent, 'blockquote')
+            quote = util.etree.SubElement(parent, 'blockquote')
        # Recursively parse block with blockquote as parent.
+        # change parser state so blockquotes embedded in lists use p tags
+        self.parser.state.set('blockquote')
        self.parser.parseChunk(quote, block)
+        self.parser.state.reset()

    def clean(self, line):
        """ Remove ``>`` from beginning of a line. """
@ -256,11 +298,18 @@ class OListProcessor(BlockProcessor):

    TAG = 'ol'
    # Detect an item (``1. item``). ``group(1)`` contains contents of item.
-    RE = re.compile(r'^[ ]{0,3}\d+\.[ ](.*)')
+    RE = re.compile(r'^[ ]{0,3}\d+\.[ ]+(.*)')
    # Detect items on secondary lines. they can be of either list type.
-    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ](.*)')
+    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.)|[*+-])[ ]+(.*)')
    # Detect indented (nested) items of either type
-    INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ].*')
+    INDENT_RE = re.compile(r'^[ ]{4,7}((\d+\.)|[*+-])[ ]+.*')
+    # The integer (python string) with which the lists starts (default=1)
+    # Eg: If list is intialized as)
+    #   3. Item
+    # The ol tag will get starts="3" attribute
+    STARTSWITH = '1'
+    # List of allowed sibling tags. 
+    SIBLING_TAGS = ['ol', 'ul']

    def test(self, parent, block):
        return bool(self.RE.match(block))
@ -269,33 +318,58 @@ class OListProcessor(BlockProcessor):
        # Check fr multiple items in one block.
        items = self.get_items(blocks.pop(0))
        sibling = self.lastChild(parent)
-        if sibling and sibling.tag in ['ol', 'ul']:
+
+        if sibling and sibling.tag in self.SIBLING_TAGS:
            # Previous block was a list item, so set that as parent
            lst = sibling
-            # make sure previous item is in a p.
-            if len(lst) and lst[-1].text and not len(lst[-1]):
-                p = markdown.etree.SubElement(lst[-1], 'p')
+            # make sure previous item is in a p- if the item has text, then it
+            # it isn't in a p
+            if lst[-1].text: 
+                # since it's possible there are other children for this sibling,
+                # we can't just SubElement the p, we need to insert it as the 
+                # first item
+                p = util.etree.Element('p')
                p.text = lst[-1].text
                lst[-1].text = ''
+                lst[-1].insert(0, p)
+            # if the last item has a tail, then the tail needs to be put in a p
+            # likely only when a header is not followed by a blank line
+            lch = self.lastChild(lst[-1])
+            if lch is not None and lch.tail:
+                p = util.etree.SubElement(lst[-1], 'p')
+                p.text = lch.tail.lstrip()
+                lch.tail = ''
+
            # parse first block differently as it gets wrapped in a p.
-            li = markdown.etree.SubElement(lst, 'li')
+            li = util.etree.SubElement(lst, 'li')
            self.parser.state.set('looselist')
            firstitem = items.pop(0)
            self.parser.parseBlocks(li, [firstitem])
            self.parser.state.reset()
+        elif parent.tag in ['ol', 'ul']:
+            # this catches the edge case of a multi-item indented list whose 
+            # first item is in a blank parent-list item:
+            # * * subitem1
+            #     * subitem2
+            # see also ListIndentProcessor
+            lst = parent
        else:
            # This is a new list so create parent with appropriate tag.
-            lst = markdown.etree.SubElement(parent, self.TAG)
+            lst = util.etree.SubElement(parent, self.TAG)
+            # Check if a custom start integer is set
+            if not self.parser.markdown.lazy_ol and self.STARTSWITH !='1':
+                lst.attrib['start'] = self.STARTSWITH
+
        self.parser.state.set('list')
        # Loop through items in block, recursively parsing each with the
        # appropriate parent.
        for item in items:
-            if item.startswith(' '*markdown.TAB_LENGTH):
+            if item.startswith(' '*self.tab_length):
                # Item is indented. Parse with last item as parent
                self.parser.parseBlocks(lst[-1], [item])
            else:
                # New item. Create li and parse with it as parent
-                li = markdown.etree.SubElement(lst, 'li')
+                li = util.etree.SubElement(lst, 'li')
                self.parser.parseBlocks(li, [item])
        self.parser.state.reset()

@ -305,11 +379,17 @@ class OListProcessor(BlockProcessor):
        for line in block.split('\n'):
            m = self.CHILD_RE.match(line)
            if m:
-                # This is a new item. Append
+                # This is a new list item
+                # Check first item for the start index
+                if not items and self.TAG=='ol':
+                    # Detect the integer value of first list item
+                    INTEGER_RE = re.compile('(\d+)')
+                    self.STARTSWITH = INTEGER_RE.match(m.group(1)).group()
+                # Append to the list
                items.append(m.group(3))
            elif self.INDENT_RE.match(line):
                # This is an indented (possibly nested) item.
-                if items[-1].startswith(' '*markdown.TAB_LENGTH):
+                if items[-1].startswith(' '*self.tab_length):
                    # Previous item was indented. Append to that item.
                    items[-1] = '%s\n%s' % (items[-1], line)
                else:
@ -324,7 +404,7 @@ class UListProcessor(OListProcessor):
    """ Process unordered list blocks. """

    TAG = 'ul'
-    RE = re.compile(r'^[ ]{0,3}[*+-][ ](.*)')
+    RE = re.compile(r'^[ ]{0,3}[*+-][ ]+(.*)')


 class HashHeaderProcessor(BlockProcessor):
@ -348,21 +428,21 @@ class HashHeaderProcessor(BlockProcessor):
                # recursively parse this lines as a block.
                self.parser.parseBlocks(parent, [before])
            # Create header using named groups from RE
-            h = markdown.etree.SubElement(parent, 'h%d' % len(m.group('level')))
+            h = util.etree.SubElement(parent, 'h%d' % len(m.group('level')))
            h.text = m.group('header').strip()
            if after:
                # Insert remaining lines as first block for future parsing.
                blocks.insert(0, after)
        else:
            # This should never happen, but just in case...
-            print("We've got a problem header!")
+            logger.warn("We've got a problem header: %r" % block)


 class SetextHeaderProcessor(BlockProcessor):
    """ Process Setext-style Headers. """

    # Detect Setext-style header. Must be first 2 lines of block.
-    RE = re.compile(r'^.*?\n[=-]{3,}', re.MULTILINE)
+    RE = re.compile(r'^.*?\n[=-]+[ ]*(\n|$)', re.MULTILINE)

    def test(self, parent, block):
        return bool(self.RE.match(block))
@ -374,7 +454,7 @@ class SetextHeaderProcessor(BlockProcessor):
            level = 1
        else:
            level = 2
-        h = markdown.etree.SubElement(parent, 'h%d' % level)
+        h = util.etree.SubElement(parent, 'h%d' % level)
        h.text = lines[0].strip()
        if len(lines) > 2:
            # Block contains additional lines. Add to  master blocks for later.
@ -384,58 +464,60 @@ class SetextHeaderProcessor(BlockProcessor):
 class HRProcessor(BlockProcessor):
    """ Process Horizontal Rules. """

-    RE = r'[ ]{0,3}(?P<ch>[*_-])[ ]?((?P=ch)[ ]?){2,}[ ]*'
+    RE = r'^[ ]{0,3}((-+[ ]{0,2}){3,}|(_+[ ]{0,2}){3,}|(\*+[ ]{0,2}){3,})[ ]*'
    # Detect hr on any line of a block.
-    SEARCH_RE = re.compile(r'(^|\n)%s(\n|$)' % RE)
-    # Match a hr on a single line of text.
-    MATCH_RE = re.compile(r'^%s$' % RE)
+    SEARCH_RE = re.compile(RE, re.MULTILINE)

    def test(self, parent, block):
-        return bool(self.SEARCH_RE.search(block))
-
-    def run(self, parent, blocks):
-        lines = blocks.pop(0).split('\n')
-        prelines = []
-        # Check for lines in block before hr.
-        for line in lines:
-            m = self.MATCH_RE.match(line)
-            if m:
-                break
-            else:
-                prelines.append(line)
-        if len(prelines):
-            # Recursively parse lines before hr so they get parsed first.
-            self.parser.parseBlocks(parent, ['\n'.join(prelines)])
-        # create hr
-        markdown.etree.SubElement(parent, 'hr')
-        # check for lines in block after hr.
-        lines = lines[len(prelines)+1:]
-        if len(lines):
-            # Add lines after hr to master blocks for later parsing.
-            blocks.insert(0, '\n'.join(lines))
-
-
-class EmptyBlockProcessor(BlockProcessor):
-    """ Process blocks and start with an empty line. """
-
-    # Detect a block that only contains whitespace
-    # or only whitespace on the first line.
-    RE = re.compile(r'^\s*\n')
-
-    def test(self, parent, block):
-        return bool(self.RE.match(block))
+        m = self.SEARCH_RE.search(block)
+        # No atomic grouping in python so we simulate it here for performance.
+        # The regex only matches what would be in the atomic group - the HR.
+        # Then check if we are at end of block or if next char is a newline.
+        if m and (m.end() == len(block) or block[m.end()] == '\n'):
+            # Save match object on class instance so we can use it later.
+            self.match = m
+            return True
+        return False

    def run(self, parent, blocks):
        block = blocks.pop(0)
-        m = self.RE.match(block)
-        if m:
-            # Add remaining line to master blocks for later.
-            blocks.insert(0, block[m.end():])
-            sibling = self.lastChild(parent)
-            if sibling and sibling.tag == 'pre' and sibling[0] and \
-                    sibling[0].tag == 'code':
-                # Last block is a codeblock. Append to preserve whitespace.
-                sibling[0].text = markdown.AtomicString('%s/n/n/n' % sibling[0].text )
+        # Check for lines in block before hr.
+        prelines = block[:self.match.start()].rstrip('\n')
+        if prelines:
+            # Recursively parse lines before hr so they get parsed first.
+            self.parser.parseBlocks(parent, [prelines])
+        # create hr
+        util.etree.SubElement(parent, 'hr')
+        # check for lines in block after hr.
+        postlines = block[self.match.end():].lstrip('\n')
+        if postlines:
+            # Add lines after hr to master blocks for later parsing.
+            blocks.insert(0, postlines)
+
+
+
+class EmptyBlockProcessor(BlockProcessor):
+    """ Process blocks that are empty or start with an empty line. """
+
+    def test(self, parent, block):
+        return not block or block.startswith('\n')
+
+    def run(self, parent, blocks):
+        block = blocks.pop(0)
+        filler = '\n\n'
+        if block:
+            # Starts with empty line
+            # Only replace a single line.
+            filler = '\n'
+            # Save the rest for later.
+            theRest = block[1:]
+            if theRest:
+                # Add remaining lines to master blocks for later.
+                blocks.insert(0, theRest)
+        sibling = self.lastChild(parent)
+        if sibling and sibling.tag == 'pre' and len(sibling) and sibling[0].tag == 'code':
+            # Last block is a codeblock. Append to preserve whitespace.
+            sibling[0].text = util.AtomicString('%s%s' % (sibling[0].text, filler))


 class ParagraphProcessor(BlockProcessor):
@ -449,12 +531,28 @@ class ParagraphProcessor(BlockProcessor):
        if block.strip():
            # Not a blank block. Add to parent, otherwise throw it away.
            if self.parser.state.isstate('list'):
-                # The parent is a tight-list. Append to parent.text
-                if parent.text:
-                    parent.text = '%s\n%s' % (parent.text, block)
+                # The parent is a tight-list.
+                #
+                # Check for any children. This will likely only happen in a 
+                # tight-list when a header isn't followed by a blank line.
+                # For example:
+                #
+                #     * # Header
+                #     Line 2 of list item - not part of header.
+                sibling = self.lastChild(parent)
+                if sibling is not None:
+                    # Insetrt after sibling.
+                    if sibling.tail:
+                        sibling.tail = '%s\n%s' % (sibling.tail, block)
+                    else:
+                        sibling.tail = '\n%s' % block
                else:
-                    parent.text = block.lstrip()
+                    # Append to parent.text
+                    if parent.text:
+                        parent.text = '%s\n%s' % (parent.text, block)
+                    else:
+                        parent.text = block.lstrip()
            else:
                # Create a regular paragraph
-                p = markdown.etree.SubElement(parent, 'p')
+                p = util.etree.SubElement(parent, 'p')
                p.text = block.lstrip()
--- a/src/calibre/ebooks/markdown/commandline.py
+++ b/src/calibre/ebooks/markdown/commandline.py
@ -1,96 +0,0 @@
-"""
-COMMAND-LINE SPECIFIC STUFF
-=============================================================================
-
-The rest of the code is specifically for handling the case where Python
-Markdown is called from the command line.
-"""
-
-import markdown
-import sys
-import logging
-from logging import DEBUG, INFO, CRITICAL
-
-EXECUTABLE_NAME_FOR_USAGE = "python markdown.py"
-""" The name used in the usage statement displayed for python versions < 2.3.
-(With python 2.3 and higher the usage statement is generated by optparse
-and uses the actual name of the executable called.) """
-
-OPTPARSE_WARNING = """
-Python 2.3 or higher required for advanced command line options.
-For lower versions of Python use:
-
-      %s INPUT_FILE > OUTPUT_FILE
-
-""" % EXECUTABLE_NAME_FOR_USAGE
-
-def parse_options():
-    """
-    Define and parse `optparse` options for command-line usage.
-    """
-
-    try:
-        optparse = __import__("optparse")
-    except:
-        if len(sys.argv) == 2:
-            return {'input': sys.argv[1],
-                    'output': None,
-                    'safe': False,
-                    'extensions': [],
-                    'encoding': None }, CRITICAL
-        else:
-            print OPTPARSE_WARNING
-            return None, None
-
-    parser = optparse.OptionParser(usage="%prog INPUTFILE [options]")
-    parser.add_option("-f", "--file", dest="filename", default=sys.stdout,
-                      help="write output to OUTPUT_FILE",
-                      metavar="OUTPUT_FILE")
-    parser.add_option("-e", "--encoding", dest="encoding",
-                      help="encoding for input and output files",)
-    parser.add_option("-q", "--quiet", default = CRITICAL,
-                      action="store_const", const=CRITICAL+10, dest="verbose",
-                      help="suppress all messages")
-    parser.add_option("-v", "--verbose",
-                      action="store_const", const=INFO, dest="verbose",
-                      help="print info messages")
-    parser.add_option("-s", "--safe", dest="safe", default=False,
-                      metavar="SAFE_MODE",
-                      help="safe mode ('replace', 'remove' or 'escape'  user's HTML tag)")
-    parser.add_option("-o", "--output_format", dest="output_format",
-                      default='xhtml1', metavar="OUTPUT_FORMAT",
-                      help="Format of output. One of 'xhtml1' (default) or 'html4'.")
-    parser.add_option("--noisy",
-                      action="store_const", const=DEBUG, dest="verbose",
-                      help="print debug messages")
-    parser.add_option("-x", "--extension", action="append", dest="extensions",
-                      help = "load extension EXTENSION", metavar="EXTENSION")
-
-    (options, args) = parser.parse_args()
-
-    if not len(args) == 1:
-        parser.print_help()
-        return None, None
-    else:
-        input_file = args[0]
-
-    if not options.extensions:
-        options.extensions = []
-
-    return {'input': input_file,
-            'output': options.filename,
-            'safe_mode': options.safe,
-            'extensions': options.extensions,
-            'encoding': options.encoding,
-            'output_format': options.output_format}, options.verbose
-
-def run():
-    """Run Markdown from the command line."""
-
-    # Parse options and adjust logging level if necessary
-    options, logging_level = parse_options()
-    if not options: sys.exit(0)
-    if logging_level: logging.getLogger('MARKDOWN').setLevel(logging_level)
-
-    # Run
-    markdown.markdownFromFile(**options)
--- a/src/calibre/ebooks/markdown/etree_loader.py
+++ b/src/calibre/ebooks/markdown/etree_loader.py
@ -1,35 +0,0 @@
-
-from markdown import message, CRITICAL
-import sys
-
-## Import
-def importETree():
-    """Import the best implementation of ElementTree, return a module object."""
-    etree_in_c = None
-    try: # Is it Python 2.5+ with C implemenation of ElementTree installed?
-        import xml.etree.cElementTree as etree_in_c
-        etree_in_c
-    except ImportError:
-        try: # Is it Python 2.5+ with Python implementation of ElementTree?
-            import xml.etree.ElementTree as etree
-            etree
-        except ImportError:
-            try: # An earlier version of Python with cElementTree installed?
-                import cElementTree as etree_in_c
-            except ImportError:
-                try: # An earlier version of Python with Python ElementTree?
-                    import elementtree.ElementTree as etree
-                except ImportError:
-                    message(CRITICAL, "Failed to import ElementTree")
-                    sys.exit(1)
-    if etree_in_c and etree_in_c.VERSION < "1.0":
-        message(CRITICAL, "For cElementTree version 1.0 or higher is required.")
-        sys.exit(1)
-    elif etree_in_c :
-        return etree_in_c
-    elif etree.VERSION < "1.1":
-        message(CRITICAL, "For ElementTree version 1.1 or higher is required")
-        sys.exit(1)
-    else :
-        return etree
-
--- a/src/calibre/ebooks/markdown/extensions/init.py
+++ b/src/calibre/ebooks/markdown/extensions/init.py
@ -0,0 +1,53 @@
+"""
+Extensions
+-----------------------------------------------------------------------------
+"""
+
+from __future__ import unicode_literals
+
+class Extension(object):
+    """ Base class for extensions to subclass. """
+    def __init__(self, configs = {}):
+        """Create an instance of an Extention.
+
+        Keyword arguments:
+
+        * configs: A dict of configuration setting used by an Extension.
+        """
+        self.config = configs
+
+    def getConfig(self, key, default=''):
+        """ Return a setting for the given key or an empty string. """
+        if key in self.config:
+            return self.config[key][0]
+        else:
+            return default
+
+    def getConfigs(self):
+        """ Return all configs settings as a dict. """
+        return dict([(key, self.getConfig(key)) for key in self.config.keys()])
+
+    def getConfigInfo(self):
+        """ Return all config descriptions as a list of tuples. """
+        return [(key, self.config[key][1]) for key in self.config.keys()]
+
+    def setConfig(self, key, value):
+        """ Set a config setting for `key` with the given `value`. """
+        self.config[key][0] = value
+
+    def extendMarkdown(self, md, md_globals):
+        """
+        Add the various proccesors and patterns to the Markdown Instance.
+
+        This method must be overriden by every extension.
+
+        Keyword arguments:
+
+        * md: The Markdown instance.
+
+        * md_globals: Global variables in the markdown module namespace.
+
+        """
+        raise NotImplementedError('Extension "%s.%s" must define an "extendMarkdown"' \
+            'method.' % (self.__class__.__module__, self.__class__.__name__))
+
--- a/src/calibre/ebooks/markdown/extensions/abbr.py
+++ b/src/calibre/ebooks/markdown/extensions/abbr.py
@ -13,8 +13,8 @@ Simple Usage:
    ... *[ABBR]: Abbreviation
    ... *[REF]: Abbreviation Reference
    ... """
-    >>> markdown.markdown(text, ['abbr'])
-    u'<p>Some text with an <abbr title="Abbreviation">ABBR</abbr> and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore REFERENCE and ref.</p>'
+    >>> print markdown.markdown(text, ['abbr'])
+    <p>Some text with an <abbr title="Abbreviation">ABBR</abbr> and a <abbr title="Abbreviation Reference">REF</abbr>. Ignore REFERENCE and ref.</p>

 Copyright 2007-2008
 * [Waylan Limberg](http://achinghead.com/)
@ -23,14 +23,18 @@ Copyright 2007-2008

 '''

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..preprocessors import Preprocessor
+from ..inlinepatterns import Pattern
+from ..util import etree
 import re
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree

 # Global Vars
 ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')

-class AbbrExtension(markdown.Extension):
+class AbbrExtension(Extension):
    """ Abbreviation Extension for Python-Markdown. """

    def extendMarkdown(self, md, md_globals):
@ -38,7 +42,7 @@ class AbbrExtension(markdown.Extension):
        md.preprocessors.add('abbr', AbbrPreprocessor(md), '<reference')
        
           
-class AbbrPreprocessor(markdown.preprocessors.Preprocessor):
+class AbbrPreprocessor(Preprocessor):
    """ Abbreviation Preprocessor - parse text for abbr references. """

    def run(self, lines):
@ -75,11 +79,11 @@ class AbbrPreprocessor(markdown.preprocessors.Preprocessor):
        return r'(?P<abbr>\b%s\b)' % (r''.join(chars))


-class AbbrPattern(markdown.inlinepatterns.Pattern):
+class AbbrPattern(Pattern):
    """ Abbreviation inline pattern. """

    def __init__(self, pattern, title):
-        markdown.inlinepatterns.Pattern.__init__(self, pattern)
+        super(AbbrPattern, self).__init__(pattern)
        self.title = title

    def handleMatch(self, m):
@ -90,7 +94,3 @@ class AbbrPattern(markdown.inlinepatterns.Pattern):

 def makeExtension(configs=None):
    return AbbrExtension(configs=configs)
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/src/calibre/ebooks/markdown/extensions/admonition.py
+++ b/src/calibre/ebooks/markdown/extensions/admonition.py
@ -0,0 +1,118 @@
+"""
+Admonition extension for Python-Markdown
+========================================
+
+Adds rST-style admonitions. Inspired by [rST][] feature with the same name.
+
+The syntax is (followed by an indented block with the contents):
+    !!! [type] [optional explicit title]
+
+Where `type` is used as a CSS class name of the div. If not present, `title`
+defaults to the capitalized `type`, so "note" -> "Note".
+
+rST suggests the following `types`, but you're free to use whatever you want:
+    attention, caution, danger, error, hint, important, note, tip, warning
+
+
+A simple example:
+    !!! note
+        This is the first line inside the box.
+
+Outputs:
+    <div class="admonition note">
+    <p class="admonition-title">Note</p>
+    <p>This is the first line inside the box</p>
+    </div>
+
+You can also specify the title and CSS class of the admonition:
+    !!! custom "Did you know?"
+        Another line here.
+
+Outputs:
+    <div class="admonition custom">
+    <p class="admonition-title">Did you know?</p>
+    <p>Another line here.</p>
+    </div>
+
+[rST]: http://docutils.sourceforge.net/docs/ref/rst/directives.html#specific-admonitions
+
+By [Tiago Serafim](http://www.tiagoserafim.com/).
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..blockprocessors import BlockProcessor
+from ..util import etree
+import re
+
+
+class AdmonitionExtension(Extension):
+    """ Admonition extension for Python-Markdown. """
+
+    def extendMarkdown(self, md, md_globals):
+        """ Add Admonition to Markdown instance. """
+        md.registerExtension(self)
+
+        md.parser.blockprocessors.add('admonition',
+                                      AdmonitionProcessor(md.parser),
+                                      '_begin')
+
+
+class AdmonitionProcessor(BlockProcessor):
+
+    CLASSNAME = 'admonition'
+    CLASSNAME_TITLE = 'admonition-title'
+    RE = re.compile(r'(?:^|\n)!!!\ ?([\w\-]+)(?:\ "(.*?)")?')
+
+    def test(self, parent, block):
+        sibling = self.lastChild(parent)
+        return self.RE.search(block) or \
+            (block.startswith(' ' * self.tab_length) and sibling and \
+                sibling.get('class', '').find(self.CLASSNAME) != -1)
+
+    def run(self, parent, blocks):
+        sibling = self.lastChild(parent)
+        block = blocks.pop(0)
+        m = self.RE.search(block)
+
+        if m:
+            block = block[m.end() + 1:]  # removes the first line
+
+        block, theRest = self.detab(block)
+
+        if m:
+            klass, title = self.get_class_and_title(m)
+            div = etree.SubElement(parent, 'div')
+            div.set('class', '%s %s' % (self.CLASSNAME, klass))
+            if title:
+                p = etree.SubElement(div, 'p')
+                p.text = title
+                p.set('class', self.CLASSNAME_TITLE)
+        else:
+            div = sibling
+
+        self.parser.parseChunk(div, block)
+
+        if theRest:
+            # This block contained unindented line(s) after the first indented
+            # line. Insert these lines as the first block of the master blocks
+            # list for future processing.
+            blocks.insert(0, theRest)
+
+    def get_class_and_title(self, match):
+        klass, title = match.group(1).lower(), match.group(2)
+        if title is None:
+            # no title was provided, use the capitalized classname as title
+            # e.g.: `!!! note` will render `<p class="admonition-title">Note</p>`
+            title = klass.capitalize()
+        elif title == '':
+            # an explicit blank title should not be rendered
+            # e.g.: `!!! warning ""` will *not* render `p` with a title
+            title = None
+        return klass, title
+
+
+def makeExtension(configs={}):
+    return AdmonitionExtension(configs=configs)
--- a/src/calibre/ebooks/markdown/extensions/attr_list.py
+++ b/src/calibre/ebooks/markdown/extensions/attr_list.py
@ -0,0 +1,140 @@
+"""
+Attribute List Extension for Python-Markdown
+============================================
+
+Adds attribute list syntax. Inspired by 
+[maruku](http://maruku.rubyforge.org/proposal.html#attribute_lists)'s
+feature of the same name.
+
+Copyright 2011 [Waylan Limberg](http://achinghead.com/).
+
+Contact: markdown@freewisdom.org
+
+License: BSD (see ../LICENSE.md for details) 
+
+Dependencies:
+* [Python 2.4+](http://python.org)
+* [Markdown 2.1+](http://packages.python.org/Markdown/)
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..treeprocessors import Treeprocessor
+from ..util import isBlockLevel
+import re
+
+try:
+    Scanner = re.Scanner
+except AttributeError:
+    # must be on Python 2.4
+    from sre import Scanner
+
+def _handle_double_quote(s, t):
+    k, v = t.split('=')
+    return k, v.strip('"')
+
+def _handle_single_quote(s, t):
+    k, v = t.split('=')
+    return k, v.strip("'")
+
+def _handle_key_value(s, t): 
+    return t.split('=')
+
+def _handle_word(s, t):
+    if t.startswith('.'):
+        return '.', t[1:]
+    if t.startswith('#'):
+        return 'id', t[1:]
+    return t, t
+
+_scanner = Scanner([
+    (r'[^ ]+=".*?"', _handle_double_quote),
+    (r"[^ ]+='.*?'", _handle_single_quote),
+    (r'[^ ]+=[^ ]*', _handle_key_value),
+    (r'[^ ]+', _handle_word),
+    (r' ', None)
+])
+
+def get_attrs(str):
+    """ Parse attribute list and return a list of attribute tuples. """
+    return _scanner.scan(str)[0]
+
+def isheader(elem):
+    return elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']
+
+class AttrListTreeprocessor(Treeprocessor):
+    
+    BASE_RE = r'\{\:?([^\}]*)\}'
+    HEADER_RE = re.compile(r'[ ]*%s[ ]*$' % BASE_RE)
+    BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
+    INLINE_RE = re.compile(r'^%s' % BASE_RE)
+    NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff\u0370-\u037d'
+                         r'\u037f-\u1fff\u200c-\u200d\u2070-\u218f\u2c00-\u2fef'
+                         r'\u3001-\ud7ff\uf900-\ufdcf\ufdf0-\ufffd'
+                         r'\:\-\.0-9\u00b7\u0300-\u036f\u203f-\u2040]+')
+
+    def run(self, doc):
+        for elem in doc.getiterator():
+            if isBlockLevel(elem.tag):
+                # Block level: check for attrs on last line of text
+                RE = self.BLOCK_RE
+                if isheader(elem):
+                    # header: check for attrs at end of line
+                    RE = self.HEADER_RE
+                if len(elem) and elem[-1].tail:
+                    # has children. Get from tail of last child
+                    m = RE.search(elem[-1].tail)
+                    if m:
+                        self.assign_attrs(elem, m.group(1))
+                        elem[-1].tail = elem[-1].tail[:m.start()]
+                        if isheader(elem):
+                            # clean up trailing #s
+                            elem[-1].tail = elem[-1].tail.rstrip('#').rstrip()
+                elif elem.text:
+                    # no children. Get from text.
+                    m = RE.search(elem.text)
+                    if m:
+                        self.assign_attrs(elem, m.group(1))
+                        elem.text = elem.text[:m.start()]
+                        if isheader(elem):
+                            # clean up trailing #s
+                            elem.text = elem.text.rstrip('#').rstrip()
+            else:
+                # inline: check for attrs at start of tail
+                if elem.tail:
+                    m = self.INLINE_RE.match(elem.tail)
+                    if m:
+                        self.assign_attrs(elem, m.group(1))
+                        elem.tail = elem.tail[m.end():]
+
+    def assign_attrs(self, elem, attrs):
+        """ Assign attrs to element. """
+        for k, v in get_attrs(attrs):
+            if k == '.':
+                # add to class
+                cls = elem.get('class')
+                if cls:
+                    elem.set('class', '%s %s' % (cls, v))
+                else:
+                    elem.set('class', v)
+            else:
+                # assign attr k with v
+                elem.set(self.sanitize_name(k), v)
+
+    def sanitize_name(self, name):
+        """
+        Sanitize name as 'an XML Name, minus the ":"'.
+        See http://www.w3.org/TR/REC-xml-names/#NT-NCName
+        """
+        return self.NAME_RE.sub('_', name)
+
+
+class AttrListExtension(Extension):
+    def extendMarkdown(self, md, md_globals):
+        md.treeprocessors.add('attr_list', AttrListTreeprocessor(md), '>prettify')
+
+
+def makeExtension(configs={}):
+    return AttrListExtension(configs=configs)
--- a/src/calibre/ebooks/markdown/extensions/codehilite.py
+++ b/src/calibre/ebooks/markdown/extensions/codehilite.py
@ -1,5 +1,3 @@
-#!/usr/bin/python
-
 """
 CodeHilite Extension for Python-Markdown
 ========================================
@ -8,30 +6,33 @@ Adds code/syntax highlighting to standard Python-Markdown code blocks.

 Copyright 2006-2008 [Waylan Limberg](http://achinghead.com/).

-Project website: <http://www.freewisdom.org/project/python-markdown/CodeHilite>
+Project website: <http://packages.python.org/Markdown/extensions/code_hilite.html>
 Contact: markdown@freewisdom.org

-License: BSD (see ../docs/LICENSE for details)
+License: BSD (see ../LICENSE.md for details)

 Dependencies:
 * [Python 2.3+](http://python.org/)
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
+* [Markdown 2.0+](http://packages.python.org/Markdown/)
 * [Pygments](http://pygments.org/)

 """

-import calibre.ebooks.markdown.markdown as markdown
-
-# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY -----------------
-
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..treeprocessors import Treeprocessor
+import warnings
 try:
-    TAB_LENGTH = markdown.TAB_LENGTH
-except AttributeError:
-    TAB_LENGTH = 4
-
+    from pygments import highlight
+    from pygments.lexers import get_lexer_by_name, guess_lexer, TextLexer
+    from pygments.formatters import HtmlFormatter
+    pygments = True
+except ImportError:
+    pygments = False

 # ------------------ The Main CodeHilite Class ----------------------
-class CodeHilite:
+class CodeHilite(object):
    """
    Determine language of source code, and pass it into the pygments hilighter.

@ -41,7 +42,10 @@ class CodeHilite:

    * src: Source string or any object with a .readline attribute.

-    * linenos: (Boolen) Turn line numbering 'on' or 'off' (off by default).
+    * linenums: (Boolean) Set line numbering to 'on' (True), 'off' (False) or 'auto'(None). 
+    Set to 'auto' by default.
+
+    * guess_lang: (Boolean) Turn language auto-detection 'on' or 'off' (on by default).

    * css_class: Set class name of wrapper div ('codehilite' by default).

@ -53,11 +57,17 @@ class CodeHilite:

    """

-    def __init__(self, src=None, linenos=False, css_class="codehilite"):
+    def __init__(self, src=None, linenums=None, guess_lang=True,
+                css_class="codehilite", lang=None, style='default',
+                noclasses=False, tab_length=4):
        self.src = src
-        self.lang = None
-        self.linenos = linenos
+        self.lang = lang
+        self.linenums = linenums
+        self.guess_lang = guess_lang
        self.css_class = css_class
+        self.style = style
+        self.noclasses = noclasses
+        self.tab_length = tab_length

    def hilite(self):
        """
@ -72,64 +82,47 @@ class CodeHilite:

        self.src = self.src.strip('\n')

-        self._getLang()
+        if self.lang is None:
+            self._getLang()

-        try:
-            from pygments import highlight
-            from pygments.lexers import get_lexer_by_name, guess_lexer, \
-                                        TextLexer
-            from pygments.formatters import HtmlFormatter
-        except ImportError:
-            # just escape and pass through
-            txt = self._escape(self.src)
-            if self.linenos:
-                txt = self._number(txt)
-            else :
-                txt = '<div class="%s"><pre>%s</pre></div>\n'% \
-                        (self.css_class, txt)
-            return txt
-        else:
+        if pygments:
            try:
                lexer = get_lexer_by_name(self.lang)
            except ValueError:
                try:
-                    lexer = guess_lexer(self.src)
+                    if self.guess_lang:
+                        lexer = guess_lexer(self.src)
+                    else:
+                        lexer = TextLexer()
                except ValueError:
                    lexer = TextLexer()
-            formatter = HtmlFormatter(linenos=self.linenos, 
-                                      cssclass=self.css_class)
+            formatter = HtmlFormatter(linenos=self.linenums,
+                                      cssclass=self.css_class,
+                                      style=self.style,
+                                      noclasses=self.noclasses)
            return highlight(self.src, lexer, formatter)
-
-    def _escape(self, txt):
-        """ basic html escaping """
-        txt = txt.replace('&', '&amp;')
-        txt = txt.replace('<', '&lt;')
-        txt = txt.replace('>', '&gt;')
-        txt = txt.replace('"', '&quot;')
-        return txt
-
-    def _number(self, txt):
-        """ Use <ol> for line numbering """
-        # Fix Whitespace
-        txt = txt.replace('\t', ' '*TAB_LENGTH)
-        txt = txt.replace(" "*4, "&nbsp; &nbsp; ")
-        txt = txt.replace(" "*3, "&nbsp; &nbsp;")
-        txt = txt.replace(" "*2, "&nbsp; ")        
-        
-        # Add line numbers
-        lines = txt.splitlines()
-        txt = '<div class="codehilite"><pre><ol>\n'
-        for line in lines:
-            txt += '\t<li>%s</li>\n'% line
-        txt += '</ol></pre></div>\n'
-        return txt
-
+        else:
+            # just escape and build markup usable by JS highlighting libs
+            txt = self.src.replace('&', '&amp;')
+            txt = txt.replace('<', '&lt;')
+            txt = txt.replace('>', '&gt;')
+            txt = txt.replace('"', '&quot;')
+            classes = []
+            if self.lang:
+                classes.append('language-%s' % self.lang)
+            if self.linenums:
+                classes.append('linenums')
+            class_str = ''
+            if classes:
+                class_str = ' class="%s"' % ' '.join(classes) 
+            return '<pre class="%s"><code%s>%s</code></pre>\n'% \
+                        (self.css_class, class_str, txt)

    def _getLang(self):
        """
-        Determines language of a code block from shebang lines and whether said
+        Determines language of a code block from shebang line and whether said
        line should be removed or left in place. If the sheband line contains a
-        path (even a single /) then it is assumed to be a real shebang lines and
+        path (even a single /) then it is assumed to be a real shebang line and
        left alone. However, if no path is given (e.i.: #!python or :::python)
        then it is assumed to be a mock shebang for language identifitation of a
        code fragment and removed from the code block prior to processing for
@ -148,7 +141,7 @@ class CodeHilite:
        fl = lines.pop(0)

        c = re.compile(r'''
-            (?:(?:::+)|(?P<shebang>[#]!))	# Shebang or 2 or more colons.
+            (?:(?:^::+)|(?P<shebang>^[#]!))	# Shebang or 2 or more colons.
            (?P<path>(?:/\w+)*[/ ])?        # Zero or 1 path
            (?P<lang>[\w+-]*)               # The language
            ''',  re.VERBOSE)
@ -163,9 +156,9 @@ class CodeHilite:
            if m.group('path'):
                # path exists - restore first line
                lines.insert(0, fl)
-            if m.group('shebang'):
-                # shebang exists - use line numbers
-                self.linenos = True
+            if self.linenums is None and m.group('shebang'):
+                # Overridable and Shebang exists - use line numbers
+                self.linenums = True
        else:
            # No match
            lines.insert(0, fl)
@ -175,7 +168,7 @@ class CodeHilite:


 # ------------------ The Markdown Extension -------------------------------
-class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
+class HiliteTreeprocessor(Treeprocessor):
    """ Hilight source code in code blocks. """

    def run(self, root):
@ -185,8 +178,12 @@ class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
            children = block.getchildren()
            if len(children) == 1 and children[0].tag == 'code':
                code = CodeHilite(children[0].text,
-                            linenos=self.config['force_linenos'][0],
-                            css_class=self.config['css_class'][0])
+                            linenums=self.config['linenums'],
+                            guess_lang=self.config['guess_lang'],
+                            css_class=self.config['css_class'],
+                            style=self.config['pygments_style'],
+                            noclasses=self.config['noclasses'],
+                            tab_length=self.markdown.tab_length)
                placeholder = self.markdown.htmlStash.store(code.hilite(),
                                                            safe=True)
                # Clear codeblock in etree instance
@ -197,26 +194,45 @@ class HiliteTreeprocessor(markdown.treeprocessors.Treeprocessor):
                block.text = placeholder


-class CodeHiliteExtension(markdown.Extension):
+class CodeHiliteExtension(Extension):
    """ Add source code hilighting to markdown codeblocks. """

    def __init__(self, configs):
        # define default configs
        self.config = {
-            'force_linenos' : [False, "Force line numbers - Default: False"],
+            'linenums': [None, "Use lines numbers. True=yes, False=no, None=auto"],
+            'force_linenos' : [False, "Depreciated! Use 'linenums' instead. Force line numbers - Default: False"],
+            'guess_lang' : [True, "Automatic language detection - Default: True"],
            'css_class' : ["codehilite",
                           "Set class name for wrapper <div> - Default: codehilite"],
+            'pygments_style' : ['default', 'Pygments HTML Formatter Style (Colorscheme) - Default: default'],
+            'noclasses': [False, 'Use inline styles instead of CSS classes - Default false']
            }

        # Override defaults with user settings
        for key, value in configs:
+            # convert strings to booleans
+            if value == 'True': value = True
+            if value == 'False': value = False
+            if value == 'None': value = None
+
+            if key == 'force_linenos':
+                warnings.warn('The "force_linenos" config setting'
+                    ' to the CodeHilite extension is deprecrecated.'
+                    ' Use "linenums" instead.', PendingDeprecationWarning)
+                if value:
+                    # Carry 'force_linenos' over to new 'linenos'.
+                    self.setConfig('linenums', True)
+
            self.setConfig(key, value)

    def extendMarkdown(self, md, md_globals):
        """ Add HilitePostprocessor to Markdown instance. """
        hiliter = HiliteTreeprocessor(md)
-        hiliter.config = self.config
-        md.treeprocessors.add("hilite", hiliter, "_begin") 
+        hiliter.config = self.getConfigs()
+        md.treeprocessors.add("hilite", hiliter, "<inline")
+
+        md.registerExtension(self)


 def makeExtension(configs={}):
--- a/src/calibre/ebooks/markdown/extensions/def_list.py
+++ b/src/calibre/ebooks/markdown/extensions/def_list.py
@ -1,4 +1,3 @@
-#!/usr/bin/env Python
 """
 Definition List Extension for Python-Markdown
 =============================================
@ -19,30 +18,44 @@ Copyright 2008 - [Waylan Limberg](http://achinghead.com)

 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..blockprocessors import BlockProcessor, ListIndentProcessor
+from ..util import etree
 import re
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree


-class DefListProcessor(markdown.blockprocessors.BlockProcessor):
+class DefListProcessor(BlockProcessor):
    """ Process Definition Lists. """

    RE = re.compile(r'(^|\n)[ ]{0,3}:[ ]{1,3}(.*?)(\n|$)')
+    NO_INDENT_RE = re.compile(r'^[ ]{0,3}[^ :]')

    def test(self, parent, block):
        return bool(self.RE.search(block))

    def run(self, parent, blocks):
-        block = blocks.pop(0)
-        m = self.RE.search(block)
-        terms = [l.strip() for l in block[:m.start()].split('\n') if l.strip()]
-        d, theRest = self.detab(block[m.end():])
+
+        raw_block = blocks.pop(0)
+        m = self.RE.search(raw_block)
+        terms = [l.strip() for l in raw_block[:m.start()].split('\n') if l.strip()]
+        block = raw_block[m.end():]
+        no_indent = self.NO_INDENT_RE.match(block)
+        if no_indent:
+            d, theRest = (block, None)
+        else:
+            d, theRest = self.detab(block)
        if d:
            d = '%s\n%s' % (m.group(2), d)
        else:
            d = m.group(2)
-        #import ipdb; ipdb.set_trace()
        sibling = self.lastChild(parent)
+        if not terms and sibling is None:
+            # This is not a definition item. Most likely a paragraph that 
+            # starts with a colon at the begining of a document or list.
+            blocks.insert(0, raw_block)
+            return False
        if not terms and sibling.tag == 'p':
            # The previous paragraph contains the terms
            state = 'looselist'
@ -74,7 +87,7 @@ class DefListProcessor(markdown.blockprocessors.BlockProcessor):
        if theRest:
            blocks.insert(0, theRest)

-class DefListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):
+class DefListIndentProcessor(ListIndentProcessor):
    """ Process indented children of definition list items. """

    ITEM_TYPES = ['dd']
@ -82,12 +95,12 @@ class DefListIndentProcessor(markdown.blockprocessors.ListIndentProcessor):

    def create_item(self, parent, block):
        """ Create a new dd and parse the block with it as the parent. """
-        dd = markdown.etree.SubElement(parent, 'dd')
+        dd = etree.SubElement(parent, 'dd')
        self.parser.parseBlocks(dd, [block])
 


-class DefListExtension(markdown.Extension):
+class DefListExtension(Extension):
    """ Add definition lists to Markdown. """

    def extendMarkdown(self, md, md_globals):
--- a/src/calibre/ebooks/markdown/extensions/extra.py
+++ b/src/calibre/ebooks/markdown/extensions/extra.py
@ -1,4 +1,3 @@
-#!/usr/bin/env python
 """
 Python-Markdown Extra Extension
 ===============================
@ -27,23 +26,29 @@ when you upgrade to any future version of Python-Markdown.

 """

-import calibre.ebooks.markdown.markdown as markdown
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension

-extensions = ['fenced_code',
+extensions = ['smart_strong',
+              'fenced_code',
              'footnotes',
-              'headerid',
+              'attr_list',
              'def_list',
              'tables',
              'abbr',
              ]
              

-class ExtraExtension(markdown.Extension):
+class ExtraExtension(Extension):
    """ Add various extensions to Markdown class."""

    def extendMarkdown(self, md, md_globals):
        """ Register extension instances. """
        md.registerExtensions(extensions, self.config)
+        if not md.safeMode:
+            # Turn on processing of markdown text within raw html
+            md.preprocessors['html_block'].markdown_in_raw = True

 def makeExtension(configs={}):
    return ExtraExtension(configs=dict(configs))
--- a/src/calibre/ebooks/markdown/extensions/fenced_code.py
+++ b/src/calibre/ebooks/markdown/extensions/fenced_code.py
@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 """
 Fenced Code Extension for Python Markdown
 =========================================
@ -15,13 +13,17 @@ This extension adds Fenced Code Blocks to Python-Markdown.
    ... ~~~
    ... '''
    >>> html = markdown.markdown(text, extensions=['fenced_code'])
-    >>> html
-    u'<p>A paragraph before a fenced code block:</p>\\n<pre><code>Fenced code block\\n</code></pre>'
+    >>> print html
+    <p>A paragraph before a fenced code block:</p>
+    <pre><code>Fenced code block
+    </code></pre>

 Works with safe_mode also (we check this because we are using the HtmlStash):

-    >>> markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')
-    u'<p>A paragraph before a fenced code block:</p>\\n<pre><code>Fenced code block\\n</code></pre>'
+    >>> print markdown.markdown(text, extensions=['fenced_code'], safe_mode='replace')
+    <p>A paragraph before a fenced code block:</p>
+    <pre><code>Fenced code block
+    </code></pre>

 Include tilde's in a code block and wrap with blank lines:

@ -29,63 +31,94 @@ Include tilde's in a code block and wrap with blank lines:
    ... ~~~~~~~~
    ...
    ... ~~~~
-    ... 
    ... ~~~~~~~~'''
-    >>> markdown.markdown(text, extensions=['fenced_code'])
-    u'<pre><code>\\n~~~~\\n\\n</code></pre>'
+    >>> print markdown.markdown(text, extensions=['fenced_code'])
+    <pre><code>
+    ~~~~
+    </code></pre>

-Multiple blocks and language tags:
+Language tags:

    >>> text = '''
    ... ~~~~{.python}
-    ... block one
-    ... ~~~~
-    ... 
-    ... ~~~~.html
-    ... <p>block two</p>
+    ... # Some python code
    ... ~~~~'''
-    >>> markdown.markdown(text, extensions=['fenced_code'])
-    u'<pre><code class="python">block one\\n</code></pre>\\n\\n<pre><code class="html">&lt;p&gt;block two&lt;/p&gt;\\n</code></pre>'
+    >>> print markdown.markdown(text, extensions=['fenced_code'])
+    <pre><code class="python"># Some python code
+    </code></pre>
+
+Optionally backticks instead of tildes as per how github's code block markdown is identified:
+
+    >>> text = '''
+    ... `````
+    ... # Arbitrary code
+    ... ~~~~~ # these tildes will not close the block
+    ... `````'''
+    >>> print markdown.markdown(text, extensions=['fenced_code'])
+    <pre><code># Arbitrary code
+    ~~~~~ # these tildes will not close the block
+    </code></pre>

 Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).

-Project website: <http://www.freewisdom.org/project/python-markdown/Fenced__Code__Blocks>
+Project website: <http://packages.python.org/Markdown/extensions/fenced_code_blocks.html>
 Contact: markdown@freewisdom.org

 License: BSD (see ../docs/LICENSE for details)

 Dependencies:
-* [Python 2.3+](http://python.org)
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
+* [Python 2.4+](http://python.org)
+* [Markdown 2.0+](http://packages.python.org/Markdown/)
+* [Pygments (optional)](http://pygments.org)

 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..preprocessors import Preprocessor
+from .codehilite import CodeHilite, CodeHiliteExtension
 import re
-import calibre.ebooks.markdown.markdown as markdown

 # Global vars
 FENCED_BLOCK_RE = re.compile( \
-    r'(?P<fence>^~{3,})[ ]*(\{?\.(?P<lang>[a-zA-Z0-9_-]*)\}?)?[ ]*\n(?P<code>.*?)(?P=fence)[ ]*$', 
+    r'(?P<fence>^(?:~{3,}|`{3,}))[ ]*(\{?\.?(?P<lang>[a-zA-Z0-9_+-]*)\}?)?[ ]*\n(?P<code>.*?)(?<=\n)(?P=fence)[ ]*$',
    re.MULTILINE|re.DOTALL
    )
 CODE_WRAP = '<pre><code%s>%s</code></pre>'
 LANG_TAG = ' class="%s"'

-
-class FencedCodeExtension(markdown.Extension):
+class FencedCodeExtension(Extension):

    def extendMarkdown(self, md, md_globals):
        """ Add FencedBlockPreprocessor to the Markdown instance. """
+        md.registerExtension(self)

        md.preprocessors.add('fenced_code_block',
                                 FencedBlockPreprocessor(md),
-                                 "_begin")
+                                 ">normalize_whitespace")


-class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
+class FencedBlockPreprocessor(Preprocessor):
+
+    def __init__(self, md):
+        super(FencedBlockPreprocessor, self).__init__(md)
+
+        self.checked_for_codehilite = False
+        self.codehilite_conf = {}

    def run(self, lines):
        """ Match and store Fenced Code Blocks in the HtmlStash. """
+
+        # Check for code hilite extension
+        if not self.checked_for_codehilite:
+            for ext in self.markdown.registeredExtensions:
+                if isinstance(ext, CodeHiliteExtension):
+                    self.codehilite_conf = ext.config
+                    break
+
+            self.checked_for_codehilite = True
+
        text = "\n".join(lines)
        while 1:
            m = FENCED_BLOCK_RE.search(text)
@ -93,7 +126,22 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):
                lang = ''
                if m.group('lang'):
                    lang = LANG_TAG % m.group('lang')
-                code = CODE_WRAP % (lang, self._escape(m.group('code')))
+
+                # If config is not empty, then the codehighlite extension
+                # is enabled, so we call it to highlite the code
+                if self.codehilite_conf:
+                    highliter = CodeHilite(m.group('code'),
+                            linenums=self.codehilite_conf['linenums'][0],
+                            guess_lang=self.codehilite_conf['guess_lang'][0],
+                            css_class=self.codehilite_conf['css_class'][0],
+                            style=self.codehilite_conf['pygments_style'][0],
+                            lang=(m.group('lang') or None),
+                            noclasses=self.codehilite_conf['noclasses'][0])
+
+                    code = highliter.hilite()
+                else:
+                    code = CODE_WRAP % (lang, self._escape(m.group('code')))
+
                placeholder = self.markdown.htmlStash.store(code, safe=True)
                text = '%s\n%s\n%s'% (text[:m.start()], placeholder, text[m.end():])
            else:
@ -110,9 +158,4 @@ class FencedBlockPreprocessor(markdown.preprocessors.Preprocessor):


 def makeExtension(configs=None):
-    return FencedCodeExtension()
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
+    return FencedCodeExtension(configs=configs)
--- a/src/calibre/ebooks/markdown/extensions/footnotes.py
+++ b/src/calibre/ebooks/markdown/extensions/footnotes.py
@ -23,33 +23,55 @@ Example:

 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..preprocessors import Preprocessor
+from ..inlinepatterns import Pattern
+from ..treeprocessors import Treeprocessor
+from ..postprocessors import Postprocessor
+from ..util import etree, text_type
+from ..odict import OrderedDict
 import re
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree

 FN_BACKLINK_TEXT = "zz1337820767766393qq"
 NBSP_PLACEHOLDER =  "qq3936677670287331zz"
-DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
+DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)')
 TABBED_RE = re.compile(r'((\t)|(    ))(.*)')

-class FootnoteExtension(markdown.Extension):
+class FootnoteExtension(Extension):
    """ Footnote Extension. """

    def __init__ (self, configs):
        """ Setup configs. """
        self.config = {'PLACE_MARKER':
                       ["///Footnotes Go Here///",
-                        "The text string that marks where the footnotes go"]}
+                        "The text string that marks where the footnotes go"],
+                       'UNIQUE_IDS':
+                       [False,
+                        "Avoid name collisions across "
+                        "multiple calls to reset()."],
+                       "BACKLINK_TEXT":
+                       ["&#8617;",
+                        "The text string that links from the footnote to the reader's place."]
+                       }

        for key, value in configs:
            self.config[key][0] = value

+        # In multiple invocations, emit links that don't get tangled.
+        self.unique_prefix = 0
+
        self.reset()

    def extendMarkdown(self, md, md_globals):
        """ Add pieces to Markdown. """
        md.registerExtension(self)
        self.parser = md.parser
+        self.md = md
+        self.sep = ':'
+        if self.md.output_format in ['html5', 'xhtml5']:
+            self.sep = '-'
        # Insert a preprocessor before ReferencePreprocessor
        md.preprocessors.add("footnote", FootnotePreprocessor(self),
                             "<reference")
@ -58,17 +80,18 @@ class FootnoteExtension(markdown.Extension):
        md.inlinePatterns.add("footnote", FootnotePattern(FOOTNOTE_RE, self),
                              "<reference")
        # Insert a tree-processor that would actually add the footnote div
-        # This must be before the inline treeprocessor so inline patterns
-        # run on the contents of the div.
+        # This must be before all other treeprocessors (i.e., inline and 
+        # codehilite) so they can run on the the contents of the div.
        md.treeprocessors.add("footnote", FootnoteTreeprocessor(self),
-                                 "<inline")
+                                 "_begin")
        # Insert a postprocessor after amp_substitute oricessor
        md.postprocessors.add("footnote", FootnotePostprocessor(self),
                                  ">amp_substitute")

    def reset(self):
-        """ Clear the footnotes on reset. """
-        self.footnotes = markdown.odict.OrderedDict()
+        """ Clear the footnotes on reset, and prepare for a distinct document. """
+        self.footnotes = OrderedDict()
+        self.unique_prefix += 1

    def findFootnotesPlaceholder(self, root):
        """ Return ElementTree Element that contains Footnote placeholder. """
@ -76,10 +99,10 @@ class FootnoteExtension(markdown.Extension):
            for child in element:
                if child.text:
                    if child.text.find(self.getConfig("PLACE_MARKER")) > -1:
-                        return child, True
+                        return child, element, True
                if child.tail:
                    if child.tail.find(self.getConfig("PLACE_MARKER")) > -1:
-                        return (child, element), False
+                        return child, element, False
                finder(child)
            return None
                
@ -92,16 +115,22 @@ class FootnoteExtension(markdown.Extension):

    def makeFootnoteId(self, id):
        """ Return footnote link id. """
-        return 'fn:%s' % id
+        if self.getConfig("UNIQUE_IDS"):
+            return 'fn%s%d-%s' % (self.sep, self.unique_prefix, id)
+        else:
+            return 'fn%s%s' % (self.sep, id)

    def makeFootnoteRefId(self, id):
        """ Return footnote back-link id. """
-        return 'fnref:%s' % id
+        if self.getConfig("UNIQUE_IDS"):
+            return 'fnref%s%d-%s' % (self.sep, self.unique_prefix, id)
+        else:
+            return 'fnref%s%s' % (self.sep, id)

    def makeFootnotesDiv(self, root):
        """ Return div of footnotes as et Element. """

-        if not self.footnotes.keys():
+        if not list(self.footnotes.keys()):
            return None

        div = etree.Element("div")
@ -115,7 +144,9 @@ class FootnoteExtension(markdown.Extension):
            self.parser.parseChunk(li, self.footnotes[id])
            backlink = etree.Element("a")
            backlink.set("href", "#" + self.makeFootnoteRefId(id))
-            backlink.set("rev", "footnote")
+            if self.md.output_format not in ['html5', 'xhtml5']:
+                backlink.set("rev", "footnote") # Invalid in HTML5
+            backlink.set("class", "footnote-backref")
            backlink.set("title", "Jump back to footnote %d in the text" % \
                            (self.footnotes.index(id)+1))
            backlink.text = FN_BACKLINK_TEXT
@ -131,61 +162,39 @@ class FootnoteExtension(markdown.Extension):
        return div


-class FootnotePreprocessor(markdown.preprocessors.Preprocessor):
+class FootnotePreprocessor(Preprocessor):
    """ Find all footnote references and store for later use. """

    def __init__ (self, footnotes):
        self.footnotes = footnotes

    def run(self, lines):
-        lines = self._handleFootnoteDefinitions(lines)
-        text = "\n".join(lines)
-        return text.split("\n")
-
-    def _handleFootnoteDefinitions(self, lines):
        """
-        Recursively find all footnote definitions in lines.
+        Loop through lines and find, set, and remove footnote definitions.

        Keywords:

        * lines: A list of lines of text

-        Return: A list of lines with footnote definitions removed.
+        Return: A list of lines of text with footnote definitions removed.

        """
-        i, id, footnote = self._findFootnoteDefinition(lines)
-
-        if id :
-            plain = lines[:i]
-            detabbed, theRest = self.detectTabbed(lines[i+1:])
-            self.footnotes.setFootnote(id,
-                                       footnote + "\n"
-                                       + "\n".join(detabbed))
-            more_plain = self._handleFootnoteDefinitions(theRest)
-            return plain + [""] + more_plain
-        else :
-            return lines
-
-    def _findFootnoteDefinition(self, lines):
-        """
-        Find the parts of a footnote definition.
-
-        Keywords:
-
-        * lines: A list of lines of text.
-
-        Return: A three item tuple containing the index of the first line of a
-        footnote definition, the id of the definition and the body of the
-        definition.
-
-        """
-        counter = 0
-        for line in lines:
-            m = DEF_RE.match(line)
+        newlines = []
+        i = 0
+        while True:
+            m = DEF_RE.match(lines[i])
            if m:
-                return counter, m.group(2), m.group(3)
-            counter += 1
-        return counter, None, None
+                fn, _i = self.detectTabbed(lines[i+1:])
+                fn.insert(0, m.group(2))
+                i += _i-1 # skip past footnote
+                self.footnotes.setFootnote(m.group(1), "\n".join(fn))
+            else:
+                newlines.append(lines[i])
+            if len(lines) > i+1:
+                i += 1
+            else:
+                break
+        return newlines

    def detectTabbed(self, lines):
        """ Find indented text and remove indent before further proccesing.
@ -194,11 +203,11 @@ class FootnotePreprocessor(markdown.preprocessors.Preprocessor):

        * lines: an array of strings

-        Returns: a list of post processed items and the unused
-        remainder of the original list
+        Returns: a list of post processed items and the index of last line.

        """
        items = []
+        blank_line = False # have we encountered a blank line yet?
        i = 0 # to keep track of where we are

        def detab(line):
@ -208,15 +217,21 @@ class FootnotePreprocessor(markdown.preprocessors.Preprocessor):

        for line in lines:
            if line.strip(): # Non-blank line
-                line = detab(line)
-                if line:
+                detabbed_line = detab(line)
+                if detabbed_line:
+                    items.append(detabbed_line)
+                    i += 1
+                    continue
+                elif not blank_line and not DEF_RE.match(line):
+                    # not tabbed but still part of first par.
                    items.append(line)
                    i += 1
                    continue
                else:
-                    return items, lines[i:]
+                    return items, i+1

            else: # Blank line: _maybe_ we are done.
+                blank_line = True
                i += 1 # advance

                # Find the next non-blank line
@ -235,28 +250,33 @@ class FootnotePreprocessor(markdown.preprocessors.Preprocessor):
        else:
            i += 1

-        return items, lines[i:]
+        return items, i


-class FootnotePattern(markdown.inlinepatterns.Pattern):
+class FootnotePattern(Pattern):
    """ InlinePattern for footnote markers in a document's body text. """

    def __init__(self, pattern, footnotes):
-        markdown.inlinepatterns.Pattern.__init__(self, pattern)
+        super(FootnotePattern, self).__init__(pattern)
        self.footnotes = footnotes

    def handleMatch(self, m):
-        sup = etree.Element("sup")
-        a = etree.SubElement(sup, "a")
        id = m.group(2)
-        sup.set('id', self.footnotes.makeFootnoteRefId(id))
-        a.set('href', '#' + self.footnotes.makeFootnoteId(id))
-        a.set('rel', 'footnote')
-        a.text = str(self.footnotes.footnotes.index(id) + 1)
-        return sup
+        if id in self.footnotes.footnotes.keys():
+            sup = etree.Element("sup")
+            a = etree.SubElement(sup, "a")
+            sup.set('id', self.footnotes.makeFootnoteRefId(id))
+            a.set('href', '#' + self.footnotes.makeFootnoteId(id))
+            if self.footnotes.md.output_format not in ['html5', 'xhtml5']:
+                a.set('rel', 'footnote') # invalid in HTML5
+            a.set('class', 'footnote-ref')
+            a.text = text_type(self.footnotes.footnotes.index(id) + 1)
+            return sup
+        else:
+            return None


-class FootnoteTreeprocessor(markdown.treeprocessors.Treeprocessor):
+class FootnoteTreeprocessor(Treeprocessor):
    """ Build and append footnote div to end of document. """

    def __init__ (self, footnotes):
@ -267,23 +287,24 @@ class FootnoteTreeprocessor(markdown.treeprocessors.Treeprocessor):
        if footnotesDiv:
            result = self.footnotes.findFootnotesPlaceholder(root)
            if result:
-                node, isText = result
+                child, parent, isText = result
+                ind = parent.getchildren().index(child)
                if isText:
-                    node.text = None
-                    node.getchildren().insert(0, footnotesDiv)
+                    parent.remove(child)
+                    parent.insert(ind, footnotesDiv)
                else:
-                    child, element = node
-                    ind = element.getchildren().find(child)
-                    element.getchildren().insert(ind + 1, footnotesDiv)
+                    parent.insert(ind + 1, footnotesDiv)
                    child.tail = None
            else:
                root.append(footnotesDiv)

-class FootnotePostprocessor(markdown.postprocessors.Postprocessor):
+class FootnotePostprocessor(Postprocessor):
    """ Replace placeholders with html entities. """
+    def __init__(self, footnotes):
+        self.footnotes = footnotes

    def run(self, text):
-        text = text.replace(FN_BACKLINK_TEXT, "&#8617;")
+        text = text.replace(FN_BACKLINK_TEXT, self.footnotes.getConfig("BACKLINK_TEXT"))
        return text.replace(NBSP_PLACEHOLDER, "&#160;")

 def makeExtension(configs=[]):
--- a/src/calibre/ebooks/markdown/extensions/headerid.py
+++ b/src/calibre/ebooks/markdown/extensions/headerid.py
@ -1,28 +1,28 @@
-#!/usr/bin/python
-
 """
 HeaderID Extension for Python-Markdown
 ======================================

-Adds ability to set HTML IDs for headers.
+Auto-generate id attributes for HTML headers.

 Basic usage:

    >>> import markdown
-    >>> text = "# Some Header # {#some_id}"
+    >>> text = "# Some Header #"
    >>> md = markdown.markdown(text, ['headerid'])
-    >>> md
-    u'<h1 id="some_id">Some Header</h1>'
+    >>> print md
+    <h1 id="some-header">Some Header</h1>

 All header IDs are unique:

    >>> text = '''
    ... #Header
-    ... #Another Header {#header}
-    ... #Third Header {#header}'''
+    ... #Header
+    ... #Header'''
    >>> md = markdown.markdown(text, ['headerid'])
-    >>> md
-    u'<h1 id="header">Header</h1>\\n<h1 id="header_1">Another Header</h1>\\n<h1 id="header_2">Third Header</h1>'
+    >>> print md
+    <h1 id="header">Header</h1>
+    <h1 id="header_1">Header</h1>
+    <h1 id="header_2">Header</h1>

 To fit within a html template's hierarchy, set the header base level:

@ -30,17 +30,26 @@ To fit within a html template's hierarchy, set the header base level:
    ... #Some Header
    ... ## Next Level'''
    >>> md = markdown.markdown(text, ['headerid(level=3)'])
-    >>> md
-    u'<h3 id="some_header">Some Header</h3>\\n<h4 id="next_level">Next Level</h4>'
+    >>> print md
+    <h3 id="some-header">Some Header</h3>
+    <h4 id="next-level">Next Level</h4>
+
+Works with inline markup.
+
+    >>> text = '#Some *Header* with [markup](http://example.com).'
+    >>> md = markdown.markdown(text, ['headerid'])
+    >>> print md
+    <h1 id="some-header-with-markup">Some <em>Header</em> with <a href="http://example.com">markup</a>.</h1>

 Turn off auto generated IDs:

    >>> text = '''
    ... # Some Header
-    ... # Header with ID # { #foo }'''
+    ... # Another Header'''
    >>> md = markdown.markdown(text, ['headerid(forceid=False)'])
-    >>> md
-    u'<h1>Some Header</h1>\\n<h1 id="foo">Header with ID</h1>'
+    >>> print md
+    <h1>Some Header</h1>
+    <h1>Another Header</h1>

 Use with MetaData extension:

@ -49,85 +58,101 @@ Use with MetaData extension:
    ...
    ... # A Header'''
    >>> md = markdown.markdown(text, ['headerid', 'meta'])
-    >>> md
-    u'<h2>A Header</h2>'
+    >>> print md
+    <h2>A Header</h2>

-Copyright 2007-2008 [Waylan Limberg](http://achinghead.com/).
+Copyright 2007-2011 [Waylan Limberg](http://achinghead.com/).

-Project website: <http://www.freewisdom.org/project/python-markdown/HeaderId>
+Project website: <http://packages.python.org/Markdown/extensions/header_id.html>
 Contact: markdown@freewisdom.org

 License: BSD (see ../docs/LICENSE for details) 

 Dependencies:
 * [Python 2.3+](http://python.org)
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
+* [Markdown 2.0+](http://packages.python.org/Markdown/)

 """

-import calibre.ebooks.markdown.markdown as markdown
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..treeprocessors import Treeprocessor
 import re
-from string import ascii_lowercase, digits, punctuation
+import logging
+import unicodedata
+
+logger = logging.getLogger('MARKDOWN')

-ID_CHARS = ascii_lowercase + digits + '-_'
 IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')


-class HeaderIdProcessor(markdown.blockprocessors.BlockProcessor):
-    """ Replacement BlockProcessor for Header IDs. """
+def slugify(value, separator):
+    """ Slugify a string, to make it URL friendly. """
+    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+    value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
+    return re.sub('[%s\s]+' % separator, separator, value)

-    # Detect a header at start of any line in block
-    RE = re.compile(r"""(^|\n)
-                        (?P<level>\#{1,6})  # group('level') = string of hashes
-                        (?P<header>.*?)     # group('header') = Header text
-                        \#*                 # optional closing hashes
-                        (?:[ \t]*\{[ \t]*\#(?P<id>[-_:a-zA-Z0-9]+)[ \t]*\})?
-                        (\n|$)              #  ^^ group('id') = id attribute
-                     """,
-                     re.VERBOSE)

-    IDs = []
-
-    def test(self, parent, block):
-        return bool(self.RE.search(block))
-
-    def run(self, parent, blocks):
-        block = blocks.pop(0)
-        m = self.RE.search(block)
+def unique(id, ids):
+    """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
+    while id in ids or not id:
+        m = IDCOUNT_RE.match(id)
        if m:
-            before = block[:m.start()] # All lines before header
-            after = block[m.end():]    # All lines after header
-            if before:
-                # As the header was not the first line of the block and the
-                # lines before the header must be parsed first,
-                # recursively parse this lines as a block.
-                self.parser.parseBlocks(parent, [before])
-            # Create header using named groups from RE
-            start_level, force_id = self._get_meta()
-            level = len(m.group('level')) + start_level
-            if level > 6:
-                level = 6
-            h = markdown.etree.SubElement(parent, 'h%d' % level)
-            h.text = m.group('header').strip()
-            if m.group('id'):
-                h.set('id', self._unique_id(m.group('id')))
-            elif force_id:
-                h.set('id', self._create_id(m.group('header').strip()))
-            if after:
-                # Insert remaining lines as first block for future parsing.
-                blocks.insert(0, after)
+            id = '%s_%d'% (m.group(1), int(m.group(2))+1)
        else:
-            # This should never happen, but just in case...
-            print ("We've got a problem header!")
+            id = '%s_%d'% (id, 1)
+    ids.add(id)
+    return id
+
+
+def itertext(elem):
+    """ Loop through all children and return text only. 
+    
+    Reimplements method of same name added to ElementTree in Python 2.7
+    
+    """
+    if elem.text:
+        yield elem.text
+    for e in elem:
+        for s in itertext(e):
+            yield s
+        if e.tail:
+            yield e.tail
+
+
+class HeaderIdTreeprocessor(Treeprocessor):
+    """ Assign IDs to headers. """
+
+    IDs = set()
+
+    def run(self, doc):
+        start_level, force_id = self._get_meta()
+        slugify = self.config['slugify']
+        sep = self.config['separator']
+        for elem in doc.getiterator():
+            if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+                if force_id:
+                    if "id" in elem.attrib:
+                        id = elem.get('id')
+                    else:
+                        id = slugify(''.join(itertext(elem)), sep)
+                    elem.set('id', unique(id, self.IDs))
+                if start_level:
+                    level = int(elem.tag[-1]) + start_level
+                    if level > 6:
+                        level = 6
+                    elem.tag = 'h%d' % level
+

    def _get_meta(self):
        """ Return meta data suported by this ext as a tuple """
-        level = int(self.config['level'][0]) - 1
-        force = self._str2bool(self.config['forceid'][0])
+        level = int(self.config['level']) - 1
+        force = self._str2bool(self.config['forceid'])
        if hasattr(self.md, 'Meta'):
-            if self.md.Meta.has_key('header_level'):
+            if 'header_level' in self.md.Meta:
                level = int(self.md.Meta['header_level'][0]) - 1
-            if self.md.Meta.has_key('header_forceid'):
+            if 'header_forceid' in self.md.Meta: 
                force = self._str2bool(self.md.Meta['header_forceid'][0])
        return level, force

@ -140,34 +165,15 @@ class HeaderIdProcessor(markdown.blockprocessors.BlockProcessor):
            return True
        return default

-    def _unique_id(self, id):
-        """ Ensure ID is unique. Append '_1', '_2'... if not """
-        while id in self.IDs:
-            m = IDCOUNT_RE.match(id)
-            if m:
-                id = '%s_%d'% (m.group(1), int(m.group(2))+1)
-            else:
-                id = '%s_%d'% (id, 1)
-        self.IDs.append(id)
-        return id

-    def _create_id(self, header):
-        """ Return ID from Header text. """
-        h = ''
-        for c in header.lower().replace(' ', '_'):
-            if c in ID_CHARS:
-                h += c
-            elif c not in punctuation:
-                h += '+'
-        return self._unique_id(h)
-
-
-class HeaderIdExtension (markdown.Extension):
+class HeaderIdExtension(Extension):
    def __init__(self, configs):
        # set defaults
        self.config = {
                'level' : ['1', 'Base level for headers.'],
-                'forceid' : ['True', 'Force all headers to have an id.']
+                'forceid' : ['True', 'Force all headers to have an id.'],
+                'separator' : ['-', 'Word separator.'],
+                'slugify' : [slugify, 'Callable to generate anchors'], 
            }

        for key, value in configs:
@ -175,20 +181,19 @@ class HeaderIdExtension (markdown.Extension):

    def extendMarkdown(self, md, md_globals):
        md.registerExtension(self)
-        self.processor = HeaderIdProcessor(md.parser)
+        self.processor = HeaderIdTreeprocessor()
        self.processor.md = md
-        self.processor.config = self.config
-        # Replace existing hasheader in place.
-        md.parser.blockprocessors['hashheader'] = self.processor
+        self.processor.config = self.getConfigs()
+        if 'attr_list' in md.treeprocessors.keys():
+            # insert after attr_list treeprocessor
+            md.treeprocessors.add('headerid', self.processor, '>attr_list')
+        else:
+            # insert after 'prettify' treeprocessor.
+            md.treeprocessors.add('headerid', self.processor, '>prettify')

    def reset(self):
-        self.processor.IDs = []
+        self.processor.IDs = set()


 def makeExtension(configs=None):
    return HeaderIdExtension(configs=configs)
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
-
--- a/src/calibre/ebooks/markdown/extensions/meta.py
+++ b/src/calibre/ebooks/markdown/extensions/meta.py
@ -1,5 +1,3 @@
-#!usr/bin/python
-
 """
 Meta Data Extension for Python-Markdown
 =======================================
@ -17,37 +15,41 @@ Basic Usage:
    ... The body. This is paragraph one.
    ... '''
    >>> md = markdown.Markdown(['meta'])
-    >>> md.convert(text)
-    u'<p>The body. This is paragraph one.</p>'
-    >>> md.Meta
+    >>> print md.convert(text)
+    <p>The body. This is paragraph one.</p>
+    >>> print md.Meta
    {u'blank_data': [u''], u'author': [u'Waylan Limberg', u'John Doe'], u'title': [u'A Test Doc.']}

 Make sure text without Meta Data still works (markdown < 1.6b returns a <p>).

    >>> text = '    Some Code - not extra lines of meta data.'
    >>> md = markdown.Markdown(['meta'])
-    >>> md.convert(text)
-    u'<pre><code>Some Code - not extra lines of meta data.\\n</code></pre>'
+    >>> print md.convert(text)
+    <pre><code>Some Code - not extra lines of meta data.
+    </code></pre>
    >>> md.Meta
    {}

 Copyright 2007-2008 [Waylan Limberg](http://achinghead.com).

-Project website: <http://www.freewisdom.org/project/python-markdown/Meta-Data>
+Project website: <http://packages.python.org/Markdown/meta_data.html>
 Contact: markdown@freewisdom.org

-License: BSD (see ../docs/LICENSE for details)
+License: BSD (see ../LICENSE.md for details)

 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..preprocessors import Preprocessor
 import re
-import calibre.ebooks.markdown.markdown as markdown

 # Global Vars
 META_RE = re.compile(r'^[ ]{0,3}(?P<key>[A-Za-z0-9_-]+):\s*(?P<value>.*)')
 META_MORE_RE = re.compile(r'^[ ]{4,}(?P<value>.*)')

-class MetaExtension (markdown.Extension):
+class MetaExtension (Extension):
    """ Meta-Data extension for Python-Markdown. """

    def extendMarkdown(self, md, md_globals):
@ -56,7 +58,7 @@ class MetaExtension (markdown.Extension):
        md.preprocessors.add("meta", MetaPreprocessor(md), "_begin")


-class MetaPreprocessor(markdown.preprocessors.Preprocessor):
+class MetaPreprocessor(Preprocessor):
    """ Get Meta-Data. """

    def run(self, lines):
@ -70,7 +72,11 @@ class MetaPreprocessor(markdown.preprocessors.Preprocessor):
            m1 = META_RE.match(line)
            if m1:
                key = m1.group('key').lower().strip()
-                meta[key] = [m1.group('value').strip()]
+                value = m1.group('value').strip()
+                try:
+                    meta[key].append(value)
+                except KeyError:
+                    meta[key] = [value]
            else:
                m2 = META_MORE_RE.match(line)
                if m2 and key:
@ -85,7 +91,3 @@ class MetaPreprocessor(markdown.preprocessors.Preprocessor):

 def makeExtension(configs={}):
    return MetaExtension(configs=configs)
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
--- a/src/calibre/ebooks/markdown/extensions/nl2br.py
+++ b/src/calibre/ebooks/markdown/extensions/nl2br.py
@ -0,0 +1,38 @@
+"""
+NL2BR Extension
+===============
+
+A Python-Markdown extension to treat newlines as hard breaks; like
+GitHub-flavored Markdown does.
+
+Usage:
+
+    >>> import markdown
+    >>> print markdown.markdown('line 1\\nline 2', extensions=['nl2br'])
+    <p>line 1<br />
+    line 2</p>
+
+Copyright 2011 [Brian Neal](http://deathofagremmie.com/)
+
+Dependencies:
+* [Python 2.4+](http://python.org)
+* [Markdown 2.1+](http://packages.python.org/Markdown/)
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..inlinepatterns import SubstituteTagPattern
+
+BR_RE = r'\n'
+
+class Nl2BrExtension(Extension):
+
+    def extendMarkdown(self, md, md_globals):
+        br_tag = SubstituteTagPattern(BR_RE, 'br')
+        md.inlinePatterns.add('nl', br_tag, '_end')
+
+
+def makeExtension(configs=None):
+    return Nl2BrExtension(configs)
--- a/src/calibre/ebooks/markdown/extensions/rss.py
+++ b/src/calibre/ebooks/markdown/extensions/rss.py
@ -1,114 +0,0 @@
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree
-
-DEFAULT_URL = "http://www.freewisdom.org/projects/python-markdown/"
-DEFAULT_CREATOR = "Yuri Takhteyev"
-DEFAULT_TITLE = "Markdown in Python"
-GENERATOR = "http://www.freewisdom.org/projects/python-markdown/markdown2rss"
-
-month_map = { "Jan" : "01",
-              "Feb" : "02",
-              "March" : "03",
-              "April" : "04",
-              "May" : "05",
-              "June" : "06",
-              "July" : "07",
-              "August" : "08",
-              "September" : "09",
-              "October" : "10",
-              "November" : "11",
-              "December" : "12" }
-
-def get_time(heading):
-
-    heading = heading.split("-")[0]
-    heading = heading.strip().replace(",", " ").replace(".", " ")
-
-    month, date, year = heading.split()
-    month = month_map[month]
-
-    return rdftime(" ".join((month, date, year, "12:00:00 AM")))
-
-def rdftime(time):
-
-    time = time.replace(":", " ")
-    time = time.replace("/", " ")
-    time = time.split()
-    return "%s-%s-%sT%s:%s:%s-08:00" % (time[0], time[1], time[2],
-                                        time[3], time[4], time[5])
-
-
-def get_date(text):
-    return "date"
-
-class RssExtension (markdown.Extension):
-
-    def extendMarkdown(self, md, md_globals):
-
-        self.config = { 'URL' : [DEFAULT_URL, "Main URL"],
-                        'CREATOR' : [DEFAULT_CREATOR, "Feed creator's name"],
-                        'TITLE' : [DEFAULT_TITLE, "Feed title"] }
-
-        md.xml_mode = True
-        
-        # Insert a tree-processor that would actually add the title tag
-        treeprocessor = RssTreeProcessor(md)
-        treeprocessor.ext = self
-        md.treeprocessors['rss'] = treeprocessor
-        md.stripTopLevelTags = 0
-        md.docType = '<?xml version="1.0" encoding="utf-8"?>\n'
-
-class RssTreeProcessor(markdown.treeprocessors.Treeprocessor):
-
-    def run (self, root):
-
-        rss = etree.Element("rss")
-        rss.set("version", "2.0")
-
-        channel = etree.SubElement(rss, "channel")
-
-        for tag, text in (("title", self.ext.getConfig("TITLE")),
-                          ("link", self.ext.getConfig("URL")),
-                          ("description", None)):
-            
-            element = etree.SubElement(channel, tag)
-            element.text = text
-
-        for child in root:
-
-            if child.tag in ["h1", "h2", "h3", "h4", "h5"]:
-      
-                heading = child.text.strip()
-                item = etree.SubElement(channel, "item")
-                link = etree.SubElement(item, "link")
-                link.text = self.ext.getConfig("URL")
-                title = etree.SubElement(item, "title")
-                title.text = heading
-
-                guid = ''.join([x for x in heading if x.isalnum()])
-                guidElem = etree.SubElement(item, "guid")
-                guidElem.text = guid
-                guidElem.set("isPermaLink", "false")
-
-            elif child.tag in ["p"]:
-                try:
-                    description = etree.SubElement(item, "description")
-                except UnboundLocalError:
-                    # Item not defined - moving on
-                    pass
-                else:
-                    if len(child):
-                        content = "\n".join([etree.tostring(node)
-                                             for node in child])
-                    else:
-                        content = child.text
-                    pholder = self.markdown.htmlStash.store(
-                                                "<![CDATA[ %s]]>" % content)
-                    description.text = pholder
-    
-        return rss
-
-
-def makeExtension(configs):
-
-    return RssExtension(configs)
--- a/src/calibre/ebooks/markdown/extensions/sane_lists.py
+++ b/src/calibre/ebooks/markdown/extensions/sane_lists.py
@ -0,0 +1,51 @@
+"""
+Sane List Extension for Python-Markdown
+=======================================
+
+Modify the behavior of Lists in Python-Markdown t act in a sane manor.
+
+In standard Markdown sytex, the following would constitute a single 
+ordered list. However, with this extension, the output would include 
+two lists, the first an ordered list and the second and unordered list.
+
+    1. ordered
+    2. list
+
+    * unordered
+    * list
+
+Copyright 2011 - [Waylan Limberg](http://achinghead.com)
+
+"""
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..blockprocessors import OListProcessor, UListProcessor
+import re
+
+
+class SaneOListProcessor(OListProcessor):
+    
+    CHILD_RE = re.compile(r'^[ ]{0,3}((\d+\.))[ ]+(.*)')
+    SIBLING_TAGS = ['ol']
+
+
+class SaneUListProcessor(UListProcessor):
+    
+    CHILD_RE = re.compile(r'^[ ]{0,3}(([*+-]))[ ]+(.*)')
+    SIBLING_TAGS = ['ul']
+
+
+class SaneListExtension(Extension):
+    """ Add sane lists to Markdown. """
+
+    def extendMarkdown(self, md, md_globals):
+        """ Override existing Processors. """
+        md.parser.blockprocessors['olist'] = SaneOListProcessor(md.parser)
+        md.parser.blockprocessors['ulist'] = SaneUListProcessor(md.parser)
+
+
+def makeExtension(configs={}):
+    return SaneListExtension(configs=configs)
+
--- a/src/calibre/ebooks/markdown/extensions/smart_strong.py
+++ b/src/calibre/ebooks/markdown/extensions/smart_strong.py
@ -0,0 +1,42 @@
+'''
+Smart_Strong Extension for Python-Markdown
+==========================================
+
+This extention adds smarter handling of double underscores within words.
+
+Simple Usage:
+
+    >>> import markdown
+    >>> print markdown.markdown('Text with double__underscore__words.',
+    ...                   extensions=['smart_strong'])
+    <p>Text with double__underscore__words.</p>
+    >>> print markdown.markdown('__Strong__ still works.',
+    ...                   extensions=['smart_strong'])
+    <p><strong>Strong</strong> still works.</p>
+    >>> print markdown.markdown('__this__works__too__.',
+    ...                   extensions=['smart_strong'])
+    <p><strong>this__works__too</strong>.</p>
+
+Copyright 2011
+[Waylan Limberg](http://achinghead.com)
+
+'''
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..inlinepatterns import SimpleTagPattern
+
+SMART_STRONG_RE = r'(?<!\w)(_{2})(?!_)(.+?)(?<!_)\2(?!\w)'
+STRONG_RE = r'(\*{2})(.+?)\2'
+
+class SmartEmphasisExtension(Extension):
+    """ Add smart_emphasis extension to Markdown class."""
+
+    def extendMarkdown(self, md, md_globals):
+        """ Modify inline patterns. """
+        md.inlinePatterns['strong'] = SimpleTagPattern(STRONG_RE, 'strong')
+        md.inlinePatterns.add('strong2', SimpleTagPattern(SMART_STRONG_RE, 'strong'), '>emphasis2')
+
+def makeExtension(configs={}):
+    return SmartEmphasisExtension(configs=dict(configs))
--- a/src/calibre/ebooks/markdown/extensions/tables.py
+++ b/src/calibre/ebooks/markdown/extensions/tables.py
@ -1,4 +1,3 @@
-#!/usr/bin/env Python
 """
 Tables Extension for Python-Markdown
 ====================================
@ -14,31 +13,35 @@ A simple example:

 Copyright 2009 - [Waylan Limberg](http://achinghead.com)
 """
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..blockprocessors import BlockProcessor
+from ..util import etree

-class TableProcessor(markdown.blockprocessors.BlockProcessor):
+class TableProcessor(BlockProcessor):
    """ Process Tables. """

    def test(self, parent, block):
        rows = block.split('\n')
        return (len(rows) > 2 and '|' in rows[0] and 
                '|' in rows[1] and '-' in rows[1] and 
-                rows[1][0] in ['|', ':', '-'])
+                rows[1].strip()[0] in ['|', ':', '-'])

    def run(self, parent, blocks):
        """ Parse a table block and build table. """
        block = blocks.pop(0).split('\n')
-        header = block[:2]
+        header = block[0].strip()
+        seperator = block[1].strip()
        rows = block[2:]
        # Get format type (bordered by pipes or not)
        border = False
-        if header[0].startswith('|'):
+        if header.startswith('|'):
            border = True
        # Get alignment of columns
        align = []
-        for c in self._split_row(header[1], border):
+        for c in self._split_row(seperator, border):
            if c.startswith(':') and c.endswith(':'):
                align.append('center')
            elif c.startswith(':'):
@ -50,10 +53,10 @@ class TableProcessor(markdown.blockprocessors.BlockProcessor):
        # Build table
        table = etree.SubElement(parent, 'table')
        thead = etree.SubElement(table, 'thead')
-        self._build_row(header[0], thead, align, border)
+        self._build_row(header, thead, align, border)
        tbody = etree.SubElement(table, 'tbody')
        for row in rows:
-            self._build_row(row, tbody, align, border)
+            self._build_row(row.strip(), tbody, align, border)

    def _build_row(self, row, parent, align, border):
        """ Given a row of text, build table cells. """
@ -83,7 +86,7 @@ class TableProcessor(markdown.blockprocessors.BlockProcessor):
        return row.split('|')


-class TableExtension(markdown.Extension):
+class TableExtension(Extension):
    """ Add tables to Markdown. """

    def extendMarkdown(self, md, md_globals):
--- a/src/calibre/ebooks/markdown/extensions/toc.py
+++ b/src/calibre/ebooks/markdown/extensions/toc.py
@ -5,43 +5,141 @@ Table of Contents Extension for Python-Markdown
 (c) 2008 [Jack Miller](http://codezen.org)

 Dependencies:
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
+* [Markdown 2.1+](http://packages.python.org/Markdown/)

 """
-import calibre.ebooks.markdown.markdown as markdown
-from calibre.ebooks.markdown.markdown import etree
+
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..treeprocessors import Treeprocessor
+from ..util import etree
+from .headerid import slugify, unique, itertext
 import re

-class TocTreeprocessor(markdown.treeprocessors.Treeprocessor):
+
+def order_toc_list(toc_list):
+    """Given an unsorted list with errors and skips, return a nested one.
+    [{'level': 1}, {'level': 2}]
+    =>
+    [{'level': 1, 'children': [{'level': 2, 'children': []}]}]
+    
+    A wrong list is also converted:
+    [{'level': 2}, {'level': 1}]
+    =>
+    [{'level': 2, 'children': []}, {'level': 1, 'children': []}]
+    """
+    
+    def build_correct(remaining_list, prev_elements=[{'level': 1000}]):
+        
+        if not remaining_list:
+            return [], []
+        
+        current = remaining_list.pop(0)
+        if not 'children' in current.keys():
+            current['children'] = []
+        
+        if not prev_elements:
+            # This happens for instance with [8, 1, 1], ie. when some
+            # header level is outside a scope. We treat it as a
+            # top-level
+            next_elements, children = build_correct(remaining_list, [current])
+            current['children'].append(children)
+            return [current] + next_elements, []
+        
+        prev_element = prev_elements.pop()
+        children = []
+        next_elements = []
+        # Is current part of the child list or next list?
+        if current['level'] > prev_element['level']:
+            #print "%d is a child of %d" % (current['level'], prev_element['level'])
+            prev_elements.append(prev_element)
+            prev_elements.append(current)
+            prev_element['children'].append(current)
+            next_elements2, children2 = build_correct(remaining_list, prev_elements)
+            children += children2
+            next_elements += next_elements2
+        else:
+            #print "%d is ancestor of %d" % (current['level'], prev_element['level'])
+            if not prev_elements:
+                #print "No previous elements, so appending to the next set"
+                next_elements.append(current)
+                prev_elements = [current]
+                next_elements2, children2 = build_correct(remaining_list, prev_elements)
+                current['children'].extend(children2)
+            else:
+                #print "Previous elements, comparing to those first"
+                remaining_list.insert(0, current)
+                next_elements2, children2 = build_correct(remaining_list, prev_elements)
+                children.extend(children2)
+            next_elements += next_elements2
+        
+        return next_elements, children
+    
+    ordered_list, __ = build_correct(toc_list)
+    return ordered_list
+
+
+class TocTreeprocessor(Treeprocessor):
+    
    # Iterator wrapper to get parent and child all at once
    def iterparent(self, root):
        for parent in root.getiterator():
            for child in parent:
                yield parent, child
    
-    def run(self, doc):
-        div = etree.Element("div")
-        div.attrib["class"] = "toc"
-        last_li = None
+    def add_anchor(self, c, elem_id): #@ReservedAssignment
+        if self.use_anchors:
+            anchor = etree.Element("a")
+            anchor.text = c.text
+            anchor.attrib["href"] = "#" + elem_id
+            anchor.attrib["class"] = "toclink"
+            c.text = ""
+            for elem in c.getchildren():
+                anchor.append(elem)
+                c.remove(elem)
+            c.append(anchor)
    
+    def build_toc_etree(self, div, toc_list):
        # Add title to the div
-        if self.config["title"][0]:
+        if self.config["title"]:
            header = etree.SubElement(div, "span")
            header.attrib["class"] = "toctitle"
-            header.text = self.config["title"][0]
+            header.text = self.config["title"]

-        level = 0
-        list_stack=[div]
+        def build_etree_ul(toc_list, parent):
+            ul = etree.SubElement(parent, "ul")
+            for item in toc_list:
+                # List item link, to be inserted into the toc div
+                li = etree.SubElement(ul, "li")
+                link = etree.SubElement(li, "a")
+                link.text = item.get('name', '')
+                link.attrib["href"] = '#' + item.get('id', '')
+                if item['children']:
+                    build_etree_ul(item['children'], li)
+            return ul
+        
+        return build_etree_ul(toc_list, div)
+        
+    def run(self, doc):
+
+        div = etree.Element("div")
+        div.attrib["class"] = "toc"
        header_rgx = re.compile("[Hh][123456]")
        
+        self.use_anchors = self.config["anchorlink"] in [1, '1', True, 'True', 'true']
+        
        # Get a list of id attributes
-        used_ids = []
+        used_ids = set()
        for c in doc.getiterator():
            if "id" in c.attrib:
-                used_ids.append(c.attrib["id"])
+                used_ids.add(c.attrib["id"])

+        toc_list = []
+        marker_found = False
        for (p, c) in self.iterparent(doc):
-            if not c.text:
+            text = ''.join(itertext(c)).strip()
+            if not text:
                continue

            # To keep the output from screwing up the
@ -50,69 +148,54 @@ class TocTreeprocessor(markdown.treeprocessors.Treeprocessor):
            # We do not allow the marker inside a header as that
            # would causes an enless loop of placing a new TOC 
            # inside previously generated TOC.
-
-            if c.text.find(self.config["marker"][0]) > -1 and not header_rgx.match(c.tag):
+            if c.text and c.text.strip() == self.config["marker"] and \
+               not header_rgx.match(c.tag) and c.tag not in ['pre', 'code']:
                for i in range(len(p)):
                    if p[i] == c:
                        p[i] = div
                        break
+                marker_found = True
                            
            if header_rgx.match(c.tag):
-                tag_level = int(c.tag[-1])
-                
-                # Regardless of how many levels we jumped
-                # only one list should be created, since
-                # empty lists containing lists are illegal.
-    
-                if tag_level < level:
-                    list_stack.pop()
-                    level = tag_level
-
-                if tag_level > level:
-                    newlist = etree.Element("ul")
-                    if last_li:
-                        last_li.append(newlist)
-                    else:
-                        list_stack[-1].append(newlist)
-                    list_stack.append(newlist)
-                    level = tag_level
                
                # Do not override pre-existing ids 
                if not "id" in c.attrib:
-                    id = self.config["slugify"][0](c.text)
-                    if id in used_ids:
-                        ctr = 1
-                        while "%s_%d" % (id, ctr) in used_ids:
-                            ctr += 1
-                        id = "%s_%d" % (id, ctr)
-                    used_ids.append(id)
-                    c.attrib["id"] = id
+                    elem_id = unique(self.config["slugify"](text, '-'), used_ids)
+                    c.attrib["id"] = elem_id
                else:
-                    id = c.attrib["id"]
+                    elem_id = c.attrib["id"]

-                # List item link, to be inserted into the toc div
-                last_li = etree.Element("li")
-                link = etree.SubElement(last_li, "a")
-                link.text = c.text
-                link.attrib["href"] = '#' + id
+                tag_level = int(c.tag[-1])
                
-                if int(self.config["anchorlink"][0]):
-                    anchor = etree.SubElement(c, "a")
-                    anchor.text = c.text
-                    anchor.attrib["href"] = "#" + id
-                    anchor.attrib["class"] = "toclink"
-                    c.text = ""
+                toc_list.append({'level': tag_level,
+                    'id': elem_id,
+                    'name': text})
                
-                list_stack[-1].append(last_li)
+                self.add_anchor(c, elem_id)
                
-class TocExtension(markdown.Extension):
-    def __init__(self, configs):
+        toc_list_nested = order_toc_list(toc_list)
+        self.build_toc_etree(div, toc_list_nested)
+        prettify = self.markdown.treeprocessors.get('prettify')
+        if prettify: prettify.run(div)
+        if not marker_found:
+            # serialize and attach to markdown instance.
+            toc = self.markdown.serializer(div)
+            for pp in self.markdown.postprocessors.values():
+                toc = pp.run(toc)
+            self.markdown.toc = toc
+
+
+class TocExtension(Extension):
+    
+    TreeProcessorClass = TocTreeprocessor
+    
+    def __init__(self, configs=[]):
        self.config = { "marker" : ["[TOC]", 
                            "Text to find and replace with Table of Contents -"
                            "Defaults to \"[TOC]\""],
-                        "slugify" : [self.slugify,
+                        "slugify" : [slugify,
                            "Function to generate anchors based on header text-"
-                            "Defaults to a built in slugify function."],
+                            "Defaults to the headerid ext's slugify function."],
                        "title" : [None,
                            "Title to insert into TOC <div> - "
                            "Defaults to None"],
@ -123,18 +206,16 @@ class TocExtension(markdown.Extension):
        for key, value in configs:
            self.setConfig(key, value)

-    # This is exactly the same as Django's slugify
-    def slugify(self, value):
-        """ Slugify a string, to make it URL friendly. """
-        import unicodedata
-        value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
-        value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
-        return re.sub('[-\s]+','-',value)
-
    def extendMarkdown(self, md, md_globals):
-        tocext = TocTreeprocessor(md)
-        tocext.config = self.config
-        md.treeprocessors.add("toc", tocext, "_begin")
+        tocext = self.TreeProcessorClass(md)
+        tocext.config = self.getConfigs()
+        # Headerid ext is set to '>prettify'. With this set to '_end',
+        # it should always come after headerid ext (and honor ids assinged 
+        # by the header id extension) if both are used. Same goes for 
+        # attr_list extension. This must come last because we don't want
+        # to redefine ids after toc is created. But we do want toc prettified.
+        md.treeprocessors.add("toc", tocext, "_end")
+

 def makeExtension(configs={}):
    return TocExtension(configs=configs)
--- a/src/calibre/ebooks/markdown/extensions/wikilinks.py
+++ b/src/calibre/ebooks/markdown/extensions/wikilinks.py
@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 '''
 WikiLinks Extension for Python-Markdown
 ======================================
@ -11,22 +9,22 @@ Basic usage:
    >>> import markdown
    >>> text = "Some text with a [[WikiLink]]."
    >>> html = markdown.markdown(text, ['wikilinks'])
-    >>> html
-    u'<p>Some text with a <a class="wikilink" href="/WikiLink/">WikiLink</a>.</p>'
+    >>> print html
+    <p>Some text with a <a class="wikilink" href="/WikiLink/">WikiLink</a>.</p>

 Whitespace behavior:

-    >>> markdown.markdown('[[ foo bar_baz ]]', ['wikilinks'])
-    u'<p><a class="wikilink" href="/foo_bar_baz/">foo bar_baz</a></p>'
-    >>> markdown.markdown('foo [[ ]] bar', ['wikilinks'])
-    u'<p>foo  bar</p>'
+    >>> print markdown.markdown('[[ foo bar_baz ]]', ['wikilinks'])
+    <p><a class="wikilink" href="/foo_bar_baz/">foo bar_baz</a></p>
+    >>> print markdown.markdown('foo [[ ]] bar', ['wikilinks'])
+    <p>foo  bar</p>

 To define custom settings the simple way:

-    >>> markdown.markdown(text, 
+    >>> print markdown.markdown(text, 
    ...     ['wikilinks(base_url=/wiki/,end_url=.html,html_class=foo)']
    ... )
-    u'<p>Some text with a <a class="foo" href="/wiki/WikiLink.html">WikiLink</a>.</p>'
+    <p>Some text with a <a class="foo" href="/wiki/WikiLink.html">WikiLink</a>.</p>
    
 Custom settings the complex way:

@ -37,8 +35,8 @@ Custom settings the complex way:
    ...                                 ('end_url', '.html'),
    ...                                 ('html_class', '') ]},
    ...     safe_mode = True)
-    >>> md.convert(text)
-    u'<p>Some text with a <a href="http://example.com/WikiLink.html">WikiLink</a>.</p>'
+    >>> print md.convert(text)
+    <p>Some text with a <a href="http://example.com/WikiLink.html">WikiLink</a>.</p>

 Use MetaData with mdx_meta.py (Note the blank html_class in MetaData):

@ -48,13 +46,13 @@ Use MetaData with mdx_meta.py (Note the blank html_class in MetaData):
    ...
    ... Some text with a [[WikiLink]]."""
    >>> md = markdown.Markdown(extensions=['meta', 'wikilinks'])
-    >>> md.convert(text)
-    u'<p>Some text with a <a href="http://example.com/WikiLink.html">WikiLink</a>.</p>'
+    >>> print md.convert(text)
+    <p>Some text with a <a href="http://example.com/WikiLink.html">WikiLink</a>.</p>

 MetaData should not carry over to next document:

-    >>> md.convert("No [[MetaData]] here.")
-    u'<p>No <a class="wikilink" href="/MetaData/">MetaData</a> here.</p>'
+    >>> print md.convert("No [[MetaData]] here.")
+    <p>No <a class="wikilink" href="/MetaData/">MetaData</a> here.</p>

 Define a custom URL builder:

@ -62,8 +60,8 @@ Define a custom URL builder:
    ...     return '/bar/'
    >>> md = markdown.Markdown(extensions=['wikilinks'], 
    ...         extension_configs={'wikilinks' : [('build_url', my_url_builder)]})
-    >>> md.convert('[[foo]]')
-    u'<p><a class="wikilink" href="/bar/">foo</a></p>'
+    >>> print md.convert('[[foo]]')
+    <p><a class="wikilink" href="/bar/">foo</a></p>

 From the command line:

@ -75,10 +73,14 @@ License: [BSD](http://www.opensource.org/licenses/bsd-license.php)

 Dependencies:
 * [Python 2.3+](http://python.org)
-* [Markdown 2.0+](http://www.freewisdom.org/projects/python-markdown/)
+* [Markdown 2.0+](http://packages.python.org/Markdown/)
 '''

-import calibre.ebooks.markdown.markdown as markdown
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import Extension
+from ..inlinepatterns import Pattern
+from ..util import etree
 import re

 def build_url(label, base, end):
@ -87,7 +89,7 @@ def build_url(label, base, end):
    return '%s%s%s'% (base, clean_label, end)


-class WikiLinkExtension(markdown.Extension):
+class WikiLinkExtension(Extension):
    def __init__(self, configs):
        # set extension defaults
        self.config = {
@ -105,23 +107,23 @@ class WikiLinkExtension(markdown.Extension):
        self.md = md
    
        # append to end of inline patterns
-        WIKILINK_RE = r'\[\[([A-Za-z0-9_ -]+)\]\]'
-        wikilinkPattern = WikiLinks(WIKILINK_RE, self.config)
+        WIKILINK_RE = r'\[\[([\w0-9_ -]+)\]\]'
+        wikilinkPattern = WikiLinks(WIKILINK_RE, self.getConfigs())
        wikilinkPattern.md = md
        md.inlinePatterns.add('wikilink', wikilinkPattern, "<not_strong")


-class WikiLinks(markdown.inlinepatterns.Pattern):
+class WikiLinks(Pattern):
    def __init__(self, pattern, config):
-        markdown.inlinepatterns.Pattern.__init__(self, pattern)
+        super(WikiLinks, self).__init__(pattern)
        self.config = config
  
    def handleMatch(self, m):
        if m.group(2).strip():
            base_url, end_url, html_class = self._getMeta()
            label = m.group(2).strip()
-            url = self.config['build_url'][0](label, base_url, end_url)
-            a = markdown.etree.Element('a')
+            url = self.config['build_url'](label, base_url, end_url)
+            a = etree.Element('a')
            a.text = label 
            a.set('href', url)
            if html_class:
@ -132,24 +134,18 @@ class WikiLinks(markdown.inlinepatterns.Pattern):

    def _getMeta(self):
        """ Return meta data or config data. """
-        base_url = self.config['base_url'][0]
-        end_url = self.config['end_url'][0]
-        html_class = self.config['html_class'][0]
+        base_url = self.config['base_url']
+        end_url = self.config['end_url']
+        html_class = self.config['html_class']
        if hasattr(self.md, 'Meta'):
-            if self.md.Meta.has_key('wiki_base_url'):
+            if 'wiki_base_url' in self.md.Meta:
                base_url = self.md.Meta['wiki_base_url'][0]
-            if self.md.Meta.has_key('wiki_end_url'):
+            if 'wiki_end_url' in self.md.Meta:
                end_url = self.md.Meta['wiki_end_url'][0]
-            if self.md.Meta.has_key('wiki_html_class'):
+            if 'wiki_html_class' in self.md.Meta:
                html_class = self.md.Meta['wiki_html_class'][0]
        return base_url, end_url, html_class
    

 def makeExtension(configs=None) :
    return WikiLinkExtension(configs=configs)
-
-
-if __name__ == "__main__":
-    import doctest
-    doctest.testmod()
-
--- a/src/calibre/ebooks/markdown/inlinepatterns.py
+++ b/src/calibre/ebooks/markdown/inlinepatterns.py
@ -41,15 +41,48 @@ So, we apply the expressions in the following order:
 * finally we apply strong and emphasis
 """

-import markdown
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+from . import odict
 import re
-from urlparse import urlparse, urlunparse
-import sys
-if sys.version >= "3.0":
-    from html import entities as htmlentitydefs
-    htmlentitydefs
-else:
-    import htmlentitydefs
+try:
+    from urllib.parse import urlparse, urlunparse
+except ImportError:
+    from urlparse import urlparse, urlunparse
+try:
+    from html import entities
+except ImportError:
+    import htmlentitydefs as entities
+
+
+def build_inlinepatterns(md_instance, **kwargs):
+    """ Build the default set of inline patterns for Markdown. """
+    inlinePatterns = odict.OrderedDict()
+    inlinePatterns["backtick"] = BacktickPattern(BACKTICK_RE)
+    inlinePatterns["escape"] = EscapePattern(ESCAPE_RE, md_instance)
+    inlinePatterns["reference"] = ReferencePattern(REFERENCE_RE, md_instance)
+    inlinePatterns["link"] = LinkPattern(LINK_RE, md_instance)
+    inlinePatterns["image_link"] = ImagePattern(IMAGE_LINK_RE, md_instance)
+    inlinePatterns["image_reference"] = \
+            ImageReferencePattern(IMAGE_REFERENCE_RE, md_instance)
+    inlinePatterns["short_reference"] = \
+            ReferencePattern(SHORT_REF_RE, md_instance)
+    inlinePatterns["autolink"] = AutolinkPattern(AUTOLINK_RE, md_instance)
+    inlinePatterns["automail"] = AutomailPattern(AUTOMAIL_RE, md_instance)
+    inlinePatterns["linebreak"] = SubstituteTagPattern(LINE_BREAK_RE, 'br')
+    if md_instance.safeMode != 'escape':
+        inlinePatterns["html"] = HtmlPattern(HTML_RE, md_instance)
+    inlinePatterns["entity"] = HtmlPattern(ENTITY_RE, md_instance)
+    inlinePatterns["not_strong"] = SimpleTextPattern(NOT_STRONG_RE)
+    inlinePatterns["strong_em"] = DoubleTagPattern(STRONG_EM_RE, 'strong,em')
+    inlinePatterns["strong"] = SimpleTagPattern(STRONG_RE, 'strong')
+    inlinePatterns["emphasis"] = SimpleTagPattern(EMPHASIS_RE, 'em')
+    if md_instance.smart_emphasis:
+        inlinePatterns["emphasis2"] = SimpleTagPattern(SMART_EMPHASIS_RE, 'em')
+    else:
+        inlinePatterns["emphasis2"] = SimpleTagPattern(EMPHASIS_2_RE, 'em')
+    return inlinePatterns

 """
 The actual regular expressions for patterns
@ -65,31 +98,27 @@ NOIMG = r'(?<!\!)'

 BACKTICK_RE = r'(?<!\\)(`+)(.+?)(?<!`)\2(?!`)' # `e=f()` or ``e=f("`")``
 ESCAPE_RE = r'\\(.)'                             # \<
-EMPHASIS_RE = r'(\*)([^\*]*)\2'                    # *emphasis*
-STRONG_RE = r'(\*{2}|_{2})(.*?)\2'                      # **strong**
-STRONG_EM_RE = r'(\*{3}|_{3})(.*?)\2'            # ***strong***
-
-if markdown.SMART_EMPHASIS:
-    EMPHASIS_2_RE = r'(?<!\S)(_)(\S.*?)\2'        # _emphasis_
-else:
-    EMPHASIS_2_RE = r'(_)(.*?)\2'                 # _emphasis_
-
+EMPHASIS_RE = r'(\*)([^\*]+)\2'                    # *emphasis*
+STRONG_RE = r'(\*{2}|_{2})(.+?)\2'                      # **strong**
+STRONG_EM_RE = r'(\*{3}|_{3})(.+?)\2'            # ***strong***
+SMART_EMPHASIS_RE = r'(?<!\w)(_)(?!_)(.+?)(?<!_)\2(?!\w)'  # _smart_emphasis_
+EMPHASIS_2_RE = r'(_)(.+?)\2'                 # _emphasis_
 LINK_RE = NOIMG + BRK + \
-r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*)\12)?\)'''
-# [text](url) or [text](<url>)
+r'''\(\s*(<.*?>|((?:(?:\(.*?\))|[^\(\)]))*?)\s*((['"])(.*?)\12\s*)?\)'''
+# [text](url) or [text](<url>) or [text](url "title")

 IMAGE_LINK_RE = r'\!' + BRK + r'\s*\((<.*?>|([^\)]*))\)'
 # ![alttxt](http://x.com/) or ![alttxt](<http://x.com/>)
-REFERENCE_RE = NOIMG + BRK+ r'\s*\[([^\]]*)\]'           # [Google][3]
-IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2]
-NOT_STRONG_RE = r'( \* )'                        # stand-alone * or _
-AUTOLINK_RE = r'<((?:f|ht)tps?://[^>]*)>'        # <http://www.123.com>
+REFERENCE_RE = NOIMG + BRK+ r'\s?\[([^\]]*)\]'           # [Google][3]
+SHORT_REF_RE = NOIMG + r'\[([^\]]+)\]'                   # [Google]
+IMAGE_REFERENCE_RE = r'\!' + BRK + '\s?\[([^\]]*)\]' # ![alt text][2]
+NOT_STRONG_RE = r'((^| )(\*|_)( |$))'                        # stand-alone * or _
+AUTOLINK_RE = r'<((?:[Ff]|[Hh][Tt])[Tt][Pp][Ss]?://[^>]*)>' # <http://www.123.com>
 AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>'               # <me@example.com>

 HTML_RE = r'(\<([a-zA-Z/][^\>]*?|\!--.*?--)\>)'               # <...>
 ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)'               # &amp;
 LINE_BREAK_RE = r'  \n'                     # two spaces at end of line
-LINE_BREAK_2_RE = r'  $'                    # two spaces at end of text


 def dequote(string):
@ -114,10 +143,10 @@ The pattern classes
 -----------------------------------------------------------------------------
 """

-class Pattern:
+class Pattern(object):
    """Base class that inline patterns subclass. """

-    def __init__ (self, pattern, markdown_instance=None):
+    def __init__(self, pattern, markdown_instance=None):
        """
        Create an instant of an inline pattern.

@ -127,14 +156,15 @@ class Pattern:

        """
        self.pattern = pattern
-        self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, re.DOTALL)
+        self.compiled_re = re.compile("^(.*?)%s(.*?)$" % pattern, 
+                                      re.DOTALL | re.UNICODE)

        # Api for Markdown to pass safe_mode into instance
        self.safe_mode = False
        if markdown_instance:
            self.markdown = markdown_instance

-    def getCompiledRegExp (self):
+    def getCompiledRegExp(self):
        """ Return a compiled regular expression. """
        return self.compiled_re

@ -154,17 +184,57 @@ class Pattern:
        """ Return class name, to define pattern type """
        return self.__class__.__name__

-BasePattern = Pattern # for backward compatibility
+    def unescape(self, text):
+        """ Return unescaped text given text with an inline placeholder. """
+        try:
+            stash = self.markdown.treeprocessors['inline'].stashed_nodes
+        except KeyError:
+            return text
+        def itertext(el):
+            ' Reimplement Element.itertext for older python versions '
+            tag = el.tag
+            if not isinstance(tag, util.string_type) and tag is not None:
+                return
+            if el.text:
+                yield el.text
+            for e in el:
+                for s in itertext(e):
+                    yield s
+                if e.tail:
+                    yield e.tail
+        def get_stash(m):
+            id = m.group(1)
+            if id in stash:
+                value = stash.get(id)
+                if isinstance(value, util.string_type):
+                    return value
+                else:
+                    # An etree Element - return text content only
+                    return ''.join(itertext(value)) 
+        return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)

-class SimpleTextPattern (Pattern):
+
+class SimpleTextPattern(Pattern):
    """ Return a simple text of group(2) of a Pattern. """
    def handleMatch(self, m):
        text = m.group(2)
-        if text == markdown.INLINE_PLACEHOLDER_PREFIX:
+        if text == util.INLINE_PLACEHOLDER_PREFIX:
            return None
        return text

-class SimpleTagPattern (Pattern):
+
+class EscapePattern(Pattern):
+    """ Return an escaped character. """
+
+    def handleMatch(self, m):
+        char = m.group(2)
+        if char in self.markdown.ESCAPED_CHARS:
+            return '%s%s%s' % (util.STX, ord(char), util.ETX)
+        else:
+            return '\\%s' % char
+
+
+class SimpleTagPattern(Pattern):
    """
    Return element of type `tag` with a text attribute of group(3)
    of a Pattern.
@ -175,30 +245,30 @@ class SimpleTagPattern (Pattern):
        self.tag = tag

    def handleMatch(self, m):
-        el = markdown.etree.Element(self.tag)
+        el = util.etree.Element(self.tag)
        el.text = m.group(3)
        return el


-class SubstituteTagPattern (SimpleTagPattern):
-    """ Return a eLement of type `tag` with no children. """
+class SubstituteTagPattern(SimpleTagPattern):
+    """ Return an element of type `tag` with no children. """
    def handleMatch (self, m):
-        return markdown.etree.Element(self.tag)
+        return util.etree.Element(self.tag)


-class BacktickPattern (Pattern):
+class BacktickPattern(Pattern):
    """ Return a `<code>` element containing the matching text. """
    def __init__ (self, pattern):
        Pattern.__init__(self, pattern)
        self.tag = "code"

    def handleMatch(self, m):
-        el = markdown.etree.Element(self.tag)
-        el.text = markdown.AtomicString(m.group(3).strip())
+        el = util.etree.Element(self.tag)
+        el.text = util.AtomicString(m.group(3).strip())
        return el


-class DoubleTagPattern (SimpleTagPattern):
+class DoubleTagPattern(SimpleTagPattern):
    """Return a ElementTree element nested in tag2 nested in tag1.

    Useful for strong emphasis etc.
@ -206,37 +276,54 @@ class DoubleTagPattern (SimpleTagPattern):
    """
    def handleMatch(self, m):
        tag1, tag2 = self.tag.split(",")
-        el1 = markdown.etree.Element(tag1)
-        el2 = markdown.etree.SubElement(el1, tag2)
+        el1 = util.etree.Element(tag1)
+        el2 = util.etree.SubElement(el1, tag2)
        el2.text = m.group(3)
        return el1


-class HtmlPattern (Pattern):
+class HtmlPattern(Pattern):
    """ Store raw inline html and return a placeholder. """
    def handleMatch (self, m):
-        rawhtml = m.group(2)
+        rawhtml = self.unescape(m.group(2))
        place_holder = self.markdown.htmlStash.store(rawhtml)
        return place_holder

+    def unescape(self, text):
+        """ Return unescaped text given text with an inline placeholder. """
+        try:
+            stash = self.markdown.treeprocessors['inline'].stashed_nodes
+        except KeyError:
+            return text
+        def get_stash(m):
+            id = m.group(1)
+            value = stash.get(id)
+            if value is not None:
+                try:
+                    return self.markdown.serializer(value)
+                except:
+                    return '\%s' % value
            
-class LinkPattern (Pattern):
+        return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
+
+
+class LinkPattern(Pattern):
    """ Return a link element from the given match. """
    def handleMatch(self, m):
-        el = markdown.etree.Element("a")
+        el = util.etree.Element("a")
        el.text = m.group(2)
-        title = m.group(11)
+        title = m.group(13)
        href = m.group(9)

        if href:
            if href[0] == "<":
                href = href[1:-1]
-            el.set("href", self.sanitize_url(href.strip()))
+            el.set("href", self.sanitize_url(self.unescape(href.strip())))
        else:
            el.set("href", "")

        if title:
-            title = dequote(title) #.replace('"', "&quot;")
+            title = dequote(self.unescape(title)) 
            el.set("title", title)
        return el

@ -257,54 +344,75 @@ class LinkPattern (Pattern):
        `username:password@host:port`.

        """
+        url = url.replace(' ', '%20')
+        if not self.markdown.safeMode:
+            # Return immediately bipassing parsing.
+            return url
+        
+        try:
+            scheme, netloc, path, params, query, fragment = url = urlparse(url)
+        except ValueError:
+            # Bad url - so bad it couldn't be parsed.
+            return ''
+        
        locless_schemes = ['', 'mailto', 'news']
-        scheme, netloc, path, params, query, fragment = url = urlparse(url)
-        safe_url = False
-        if netloc != '' or scheme in locless_schemes:
-            safe_url = True
+        allowed_schemes = locless_schemes + ['http', 'https', 'ftp', 'ftps']
+        if scheme not in allowed_schemes:
+            # Not a known (allowed) scheme. Not safe.
+            return ''
+            
+        if netloc == '' and scheme not in locless_schemes:
+            # This should not happen. Treat as suspect.
+            return ''

        for part in url[2:]:
            if ":" in part:
-                safe_url = False
+                # A colon in "path", "parameters", "query" or "fragment" is suspect.
+                return ''

-        if self.markdown.safeMode and not safe_url:
-            return ''
-        else:
-            return urlunparse(url)
+        # Url passes all tests. Return url as-is.
+        return urlunparse(url)

 class ImagePattern(LinkPattern):
    """ Return a img element from the given match. """
    def handleMatch(self, m):
-        el = markdown.etree.Element("img")
+        el = util.etree.Element("img")
        src_parts = m.group(9).split()
        if src_parts:
            src = src_parts[0]
            if src[0] == "<" and src[-1] == ">":
                src = src[1:-1]
-            el.set('src', self.sanitize_url(src))
+            el.set('src', self.sanitize_url(self.unescape(src)))
        else:
            el.set('src', "")
        if len(src_parts) > 1:
-            el.set('title', dequote(" ".join(src_parts[1:])))
+            el.set('title', dequote(self.unescape(" ".join(src_parts[1:]))))

-        if markdown.ENABLE_ATTRIBUTES:
+        if self.markdown.enable_attributes:
            truealt = handleAttributes(m.group(2), el)
        else:
            truealt = m.group(2)

-        el.set('alt', truealt)
+        el.set('alt', self.unescape(truealt))
        return el

 class ReferencePattern(LinkPattern):
    """ Match to a stored reference and return link element. """
+
+    NEWLINE_CLEANUP_RE = re.compile(r'[ ]?\n', re.MULTILINE)
+
    def handleMatch(self, m):
-        if m.group(9):
+        try:
            id = m.group(9).lower()
-        else:
-            # if we got something like "[Google][]"
+        except IndexError:
+            id = None
+        if not id:
+            # if we got something like "[Google][]" or "[Goggle]"
            # we'll use "google" as the id
            id = m.group(2).lower()

+        # Clean up linebreaks in id
+        id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
        if not id in self.markdown.references: # ignore undefined refs
            return None
        href, title = self.markdown.references[id]
@ -313,7 +421,7 @@ class ReferencePattern(LinkPattern):
        return self.makeTag(href, title, text)

    def makeTag(self, href, title, text):
-        el = markdown.etree.Element('a')
+        el = util.etree.Element('a')

        el.set('href', self.sanitize_url(href))
        if title:
@ -323,48 +431,52 @@ class ReferencePattern(LinkPattern):
        return el


-class ImageReferencePattern (ReferencePattern):
+class ImageReferencePattern(ReferencePattern):
    """ Match to a stored reference and return img element. """
    def makeTag(self, href, title, text):
-        el = markdown.etree.Element("img")
+        el = util.etree.Element("img")
        el.set("src", self.sanitize_url(href))
        if title:
            el.set("title", title)
-        el.set("alt", text)
+
+        if self.markdown.enable_attributes:
+            text = handleAttributes(text, el)
+
+        el.set("alt", self.unescape(text))
        return el


-class AutolinkPattern (Pattern):
+class AutolinkPattern(Pattern):
    """ Return a link Element given an autolink (`<http://example/com>`). """
    def handleMatch(self, m):
-        el = markdown.etree.Element("a")
-        el.set('href', m.group(2))
-        el.text = markdown.AtomicString(m.group(2))
+        el = util.etree.Element("a")
+        el.set('href', self.unescape(m.group(2)))
+        el.text = util.AtomicString(m.group(2))
        return el

-class AutomailPattern (Pattern):
+class AutomailPattern(Pattern):
    """
    Return a mailto link Element given an automail link (`<foo@example.com>`).
    """
    def handleMatch(self, m):
-        el = markdown.etree.Element('a')
-        email = m.group(2)
+        el = util.etree.Element('a')
+        email = self.unescape(m.group(2))
        if email.startswith("mailto:"):
            email = email[len("mailto:"):]

        def codepoint2name(code):
            """Return entity definition by code, or the code if not defined."""
-            entity = htmlentitydefs.codepoint2name.get(code)
+            entity = entities.codepoint2name.get(code)
            if entity:
-                return "%s%s;" % (markdown.AMP_SUBSTITUTE, entity)
+                return "%s%s;" % (util.AMP_SUBSTITUTE, entity)
            else:
-                return "%s#%d;" % (markdown.AMP_SUBSTITUTE, code)
+                return "%s#%d;" % (util.AMP_SUBSTITUTE, code)

        letters = [codepoint2name(ord(letter)) for letter in email]
-        el.text = markdown.AtomicString(''.join(letters))
+        el.text = util.AtomicString(''.join(letters))

        mailto = "mailto:" + email
-        mailto = "".join([markdown.AMP_SUBSTITUTE + '#%d;' %
+        mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
                          ord(letter) for letter in mailto])
        el.set('href', mailto)
        return el
--- a/src/calibre/ebooks/markdown/markdown.py
+++ b/src/calibre/ebooks/markdown/markdown.py
@ -1,612 +0,0 @@
-"""
-Python Markdown
-===============
-
-Python Markdown converts Markdown to HTML and can be used as a library or
-called from the command line.
-
-## Basic usage as a module:
-
-    import markdown
-    md = Markdown()
-    html = md.convert(your_text_string)
-
-## Basic use from the command line:
-
-    python markdown.py source.txt > destination.html
-
-Run "python markdown.py --help" to see more options.
-
-## Extensions
-
-See <http://www.freewisdom.org/projects/python-markdown/> for more
-information and instructions on how to extend the functionality of
-Python Markdown.  Read that before you try modifying this file.
-
-## Authors and License
-
-Started by [Manfred Stienstra](http://www.dwerg.net/).  Continued and
-maintained  by [Yuri Takhteyev](http://www.freewisdom.org), [Waylan
-Limberg](http://achinghead.com/) and [Artem Yunusov](http://blog.splyer.com).
-
-Contact: markdown@freewisdom.org
-
-Copyright 2007, 2008 The Python Markdown Project (v. 1.7 and later)
-Copyright 200? Django Software Foundation (OrderedDict implementation)
-Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
-Copyright 2004 Manfred Stienstra (the original version)
-
-License: BSD (see docs/LICENSE for details).
-"""
-from calibre.ebooks.markdown.commandline import parse_options
-
-version = "2.0"
-version_info = (2,0,0, "Final")
-
-import re
-import codecs
-import sys
-import warnings
-import logging
-from logging import DEBUG, INFO, WARN, ERROR, CRITICAL
-
-
-"""
-CONSTANTS
-=============================================================================
-"""
-
-"""
-Constants you might want to modify
-----------------------------------------------------------------------------
-"""
-
-# default logging level for command-line use
-COMMAND_LINE_LOGGING_LEVEL = CRITICAL
-TAB_LENGTH = 4               # expand tabs to this many spaces
-ENABLE_ATTRIBUTES = True     # @id = xyz -> <... id="xyz">
-#SMART_EMPHASIS = True        # this_or_that does not become this<i>or</i>that
-SMART_EMPHASIS = False      # this_or_that needs to have _ escaped as \_.
-DEFAULT_OUTPUT_FORMAT = 'xhtml1'     # xhtml or html4 output
-HTML_REMOVED_TEXT = "[HTML_REMOVED]" # text used instead of HTML in safe mode
-BLOCK_LEVEL_ELEMENTS = re.compile("p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
-                                  "|script|noscript|form|fieldset|iframe|math"
-                                  "|ins|del|hr|hr/|style|li|dt|dd|thead|tbody"
-                                  "|tr|th|td")
-DOC_TAG = "div"     # Element used to wrap document - later removed
-
-# Placeholders
-STX = u'\u0002'  # Use STX ("Start of text") for start-of-placeholder
-ETX = u'\u0003'  # Use ETX ("End of text") for end-of-placeholder
-INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
-INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
-AMP_SUBSTITUTE = STX+"amp"+ETX
-
-
-"""
-Constants you probably do not need to change
-----------------------------------------------------------------------------
-"""
-
-RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
-                     # Hebrew (0590-05FF), Arabic (0600-06FF),
-                     # Syriac (0700-074F), Arabic supplement (0750-077F),
-                     # Thaana (0780-07BF), Nko (07C0-07FF).
-                    (u'\u2D30', u'\u2D7F'), # Tifinagh
-                    )
-
-
-"""
-AUXILIARY GLOBAL FUNCTIONS
-=============================================================================
-"""
-
-
-def message(level, text):
-    """ A wrapper method for logging debug messages. """
-    logger =  logging.getLogger('MARKDOWN')
-    if logger.handlers:
-        # The logger is configured
-        logger.log(level, text)
-        if level > WARN:
-            sys.exit(0)
-    elif level > WARN:
-        raise MarkdownException, text
-    else:
-        warnings.warn(text, MarkdownWarning)
-
-
-def isBlockLevel(tag):
-    """Check if the tag is a block level HTML tag."""
-    return BLOCK_LEVEL_ELEMENTS.match(tag)
-
-"""
-MISC AUXILIARY CLASSES
-=============================================================================
-"""
-
-class AtomicString(unicode):
-    """A string which should not be further processed."""
-    pass
-
-
-class MarkdownException(Exception):
-    """ A Markdown Exception. """
-    pass
-
-
-class MarkdownWarning(Warning):
-    """ A Markdown Warning. """
-    pass
-
-
-"""
-OVERALL DESIGN
-=============================================================================
-
-Markdown processing takes place in four steps:
-
-1. A bunch of "preprocessors" munge the input text.
-2. BlockParser() parses the high-level structural elements of the
-   pre-processed text into an ElementTree.
-3. A bunch of "treeprocessors" are run against the ElementTree. One such
-   treeprocessor runs InlinePatterns against the ElementTree, detecting inline
-   markup.
-4. Some post-processors are run against the text after the ElementTree has
-   been serialized into text.
-5. The output is written to a string.
-
-Those steps are put together by the Markdown() class.
-
-"""
-
-import preprocessors
-import blockprocessors
-import treeprocessors
-import inlinepatterns
-import postprocessors
-import blockparser
-import etree_loader
-import odict
-
-# Extensions should use "markdown.etree" instead of "etree" (or do `from
-# markdown import etree`).  Do not import it by yourself.
-
-etree = etree_loader.importETree()
-
-# Adds the ability to output html4
-import html4
-
-
-class Markdown:
-    """Convert Markdown to HTML."""
-
-    def __init__(self,
-                 extensions=[],
-                 extension_configs={},
-                 safe_mode = False, 
-                 output_format=DEFAULT_OUTPUT_FORMAT):
-        """
-        Creates a new Markdown instance.
-
-        Keyword arguments:
-
-        * extensions: A list of extensions.
-           If they are of type string, the module mdx_name.py will be loaded.
-           If they are a subclass of markdown.Extension, they will be used
-           as-is.
-        * extension-configs: Configuration setting for extensions.
-        * safe_mode: Disallow raw html. One of "remove", "replace" or "escape".
-        * output_format: Format of output. Supported formats are:
-            * "xhtml1": Outputs XHTML 1.x. Default.
-            * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
-            * "html4": Outputs HTML 4
-            * "html": Outputs latest supported version of HTML (currently HTML 4).
-            Note that it is suggested that the more specific formats ("xhtml1" 
-            and "html4") be used as "xhtml" or "html" may change in the future
-            if it makes sense at that time. 
-
-        """
-        
-        self.safeMode = safe_mode
-        self.registeredExtensions = []
-        self.docType = ""
-        self.stripTopLevelTags = True
-
-        # Preprocessors
-        self.preprocessors = odict.OrderedDict()
-        self.preprocessors["html_block"] = \
-                preprocessors.HtmlBlockPreprocessor(self)
-        self.preprocessors["reference"] = \
-                preprocessors.ReferencePreprocessor(self)
-        # footnote preprocessor will be inserted with "<reference"
-
-        # Block processors - ran by the parser
-        self.parser = blockparser.BlockParser()
-        self.parser.blockprocessors['empty'] = \
-                blockprocessors.EmptyBlockProcessor(self.parser)
-        self.parser.blockprocessors['indent'] = \
-                blockprocessors.ListIndentProcessor(self.parser)
-        self.parser.blockprocessors['code'] = \
-                blockprocessors.CodeBlockProcessor(self.parser)
-        self.parser.blockprocessors['hashheader'] = \
-                blockprocessors.HashHeaderProcessor(self.parser)
-        self.parser.blockprocessors['setextheader'] = \
-                blockprocessors.SetextHeaderProcessor(self.parser)
-        self.parser.blockprocessors['hr'] = \
-                blockprocessors.HRProcessor(self.parser)
-        self.parser.blockprocessors['olist'] = \
-                blockprocessors.OListProcessor(self.parser)
-        self.parser.blockprocessors['ulist'] = \
-                blockprocessors.UListProcessor(self.parser)
-        self.parser.blockprocessors['quote'] = \
-                blockprocessors.BlockQuoteProcessor(self.parser)
-        self.parser.blockprocessors['paragraph'] = \
-                blockprocessors.ParagraphProcessor(self.parser)
-
-
-        #self.prePatterns = []
-
-        # Inline patterns - Run on the tree
-        self.inlinePatterns = odict.OrderedDict()
-        self.inlinePatterns["backtick"] = \
-                inlinepatterns.BacktickPattern(inlinepatterns.BACKTICK_RE)
-        self.inlinePatterns["escape"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.ESCAPE_RE)
-        self.inlinePatterns["reference"] = \
-            inlinepatterns.ReferencePattern(inlinepatterns.REFERENCE_RE, self)
-        self.inlinePatterns["link"] = \
-                inlinepatterns.LinkPattern(inlinepatterns.LINK_RE, self)
-        self.inlinePatterns["image_link"] = \
-                inlinepatterns.ImagePattern(inlinepatterns.IMAGE_LINK_RE, self)
-        self.inlinePatterns["image_reference"] = \
-            inlinepatterns.ImageReferencePattern(inlinepatterns.IMAGE_REFERENCE_RE, self)
-        self.inlinePatterns["autolink"] = \
-            inlinepatterns.AutolinkPattern(inlinepatterns.AUTOLINK_RE, self)
-        self.inlinePatterns["automail"] = \
-            inlinepatterns.AutomailPattern(inlinepatterns.AUTOMAIL_RE, self)
-        self.inlinePatterns["linebreak2"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_2_RE, 'br')
-        self.inlinePatterns["linebreak"] = \
-            inlinepatterns.SubstituteTagPattern(inlinepatterns.LINE_BREAK_RE, 'br')
-        self.inlinePatterns["html"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.HTML_RE, self)
-        self.inlinePatterns["entity"] = \
-                inlinepatterns.HtmlPattern(inlinepatterns.ENTITY_RE, self)
-        self.inlinePatterns["not_strong"] = \
-                inlinepatterns.SimpleTextPattern(inlinepatterns.NOT_STRONG_RE)
-        self.inlinePatterns["strong_em"] = \
-            inlinepatterns.DoubleTagPattern(inlinepatterns.STRONG_EM_RE, 'strong,em')
-        self.inlinePatterns["strong"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.STRONG_RE, 'strong')
-        self.inlinePatterns["emphasis"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_RE, 'em')
-        self.inlinePatterns["emphasis2"] = \
-            inlinepatterns.SimpleTagPattern(inlinepatterns.EMPHASIS_2_RE, 'em')
-        # The order of the handlers matters!!!
-
-
-        # Tree processors - run once we have a basic parse.
-        self.treeprocessors = odict.OrderedDict()
-        self.treeprocessors["inline"] = treeprocessors.InlineProcessor(self)
-        self.treeprocessors["prettify"] = \
-                treeprocessors.PrettifyTreeprocessor(self)
-
-        # Postprocessors - finishing touches.
-        self.postprocessors = odict.OrderedDict()
-        self.postprocessors["raw_html"] = \
-                postprocessors.RawHtmlPostprocessor(self)
-        self.postprocessors["amp_substitute"] = \
-                postprocessors.AndSubstitutePostprocessor()
-        # footnote postprocessor will be inserted with ">amp_substitute"
-
-        # Map format keys to serializers
-        self.output_formats = {
-            'html'  : html4.to_html_string, 
-            'html4' : html4.to_html_string,
-            'xhtml' : etree.tostring, 
-            'xhtml1': etree.tostring,
-        }
-
-        self.references = {}
-        self.htmlStash = preprocessors.HtmlStash()
-        self.registerExtensions(extensions = extensions,
-                                configs = extension_configs)
-        self.set_output_format(output_format)
-        self.reset()
-
-    def registerExtensions(self, extensions, configs):
-        """
-        Register extensions with this instance of Markdown.
-
-        Keyword aurguments:
-
-        * extensions: A list of extensions, which can either
-           be strings or objects.  See the docstring on Markdown.
-        * configs: A dictionary mapping module names to config options.
-
-        """
-        for ext in extensions:
-            if isinstance(ext, basestring):
-                ext = load_extension(ext, configs.get(ext, []))
-            try:
-                ext.extendMarkdown(self, globals())
-            except AttributeError:
-                message(ERROR, "Incorrect type! Extension '%s' is "
-                               "neither a string or an Extension." %(repr(ext)))
-            
-
-    def registerExtension(self, extension):
-        """ This gets called by the extension """
-        self.registeredExtensions.append(extension)
-
-    def reset(self):
-        """
-        Resets all state variables so that we can start with a new text.
-        """
-        self.htmlStash.reset()
-        self.references.clear()
-
-        for extension in self.registeredExtensions:
-            extension.reset()
-
-    def set_output_format(self, format):
-        """ Set the output format for the class instance. """
-        try:
-            self.serializer = self.output_formats[format.lower()]
-        except KeyError:
-            message(CRITICAL, 'Invalid Output Format: "%s". Use one of %s.' \
-                               % (format, self.output_formats.keys()))
-
-    def convert(self, source):
-        """
-        Convert markdown to serialized XHTML or HTML.
-
-        Keyword arguments:
-
-        * source: Source text as a Unicode string.
-
-        """
-
-        # Fixup the source text
-        if not source.strip():
-            return u""  # a blank unicode string
-        try:
-            source = unicode(source)
-        except UnicodeDecodeError:
-            message(CRITICAL, 'UnicodeDecodeError: Markdown only accepts unicode or ascii input.')
-            return u""
-
-        source = source.replace(STX, "").replace(ETX, "")
-        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
-        source = re.sub(r'\n\s+\n', '\n\n', source)
-        source = source.expandtabs(TAB_LENGTH)
-
-        # Split into lines and run the line preprocessors.
-        self.lines = source.split("\n")
-        for prep in self.preprocessors.values():
-            self.lines = prep.run(self.lines)
-
-        # Parse the high-level elements.
-        root = self.parser.parseDocument(self.lines).getroot()
-
-        # Run the tree-processors
-        for treeprocessor in self.treeprocessors.values():
-            newRoot = treeprocessor.run(root)
-            if newRoot:
-                root = newRoot
-
-        # Serialize _properly_.  Strip top-level tags.
-        output, length = codecs.utf_8_decode(self.serializer(root, encoding="utf8"))
-        if self.stripTopLevelTags:
-            start = output.index('<%s>'%DOC_TAG)+len(DOC_TAG)+2
-            end = output.rindex('</%s>'%DOC_TAG)
-            output = output[start:end].strip()
-
-        # Run the text post-processors
-        for pp in self.postprocessors.values():
-            output = pp.run(output)
-
-        return output.strip()
-
-    def convertFile(self, input=None, output=None, encoding=None):
-        """Converts a markdown file and returns the HTML as a unicode string.
-
-        Decodes the file using the provided encoding (defaults to utf-8),
-        passes the file content to markdown, and outputs the html to either
-        the provided stream or the file with provided name, using the same
-        encoding as the source file.
-
-        **Note:** This is the only place that decoding and encoding of unicode
-        takes place in Python-Markdown.  (All other code is unicode-in /
-        unicode-out.)
-
-        Keyword arguments:
-
-        * input: Name of source text file.
-        * output: Name of output file. Writes to stdout if `None`.
-        * encoding: Encoding of input and output files. Defaults to utf-8.
-
-        """
-
-        encoding = encoding or "utf-8"
-
-        # Read the source
-        input_file = codecs.open(input, mode="r", encoding=encoding)
-        text = input_file.read()
-        input_file.close()
-        text = text.lstrip(u'\ufeff') # remove the byte-order mark
-
-        # Convert
-        html = self.convert(text)
-
-        # Write to file or stdout
-        if isinstance(output, (str, unicode)):
-            output_file = codecs.open(output, "w", encoding=encoding)
-            output_file.write(html)
-            output_file.close()
-        else:
-            output.write(html.encode(encoding))
-
-
-"""
-Extensions
-----------------------------------------------------------------------------
-"""
-
-class Extension:
-    """ Base class for extensions to subclass. """
-    def __init__(self, configs = {}):
-        """Create an instance of an Extention.
-
-        Keyword arguments:
-
-        * configs: A dict of configuration setting used by an Extension.
-        """
-        self.config = configs
-
-    def getConfig(self, key):
-        """ Return a setting for the given key or an empty string. """
-        if key in self.config:
-            return self.config[key][0]
-        else:
-            return ""
-
-    def getConfigInfo(self):
-        """ Return all config settings as a list of tuples. """
-        return [(key, self.config[key][1]) for key in self.config.keys()]
-
-    def setConfig(self, key, value):
-        """ Set a config setting for `key` with the given `value`. """
-        self.config[key][0] = value
-
-    def extendMarkdown(self, md, md_globals):
-        """
-        Add the various proccesors and patterns to the Markdown Instance.
-
-        This method must be overriden by every extension.
-
-        Keyword arguments:
-
-        * md: The Markdown instance.
-
-        * md_globals: Global variables in the markdown module namespace.
-
-        """
-        pass
-
-
-def load_extension(ext_name, configs = []):
-    """Load extension by name, then return the module.
-
-    The extension name may contain arguments as part of the string in the
-    following format: "extname(key1=value1,key2=value2)"
-
-    """
-
-    # Parse extensions config params (ignore the order)
-    configs = dict(configs)
-    pos = ext_name.find("(") # find the first "("
-    if pos > 0:
-        ext_args = ext_name[pos+1:-1]
-        ext_name = ext_name[:pos]
-        pairs = [x.split("=") for x in ext_args.split(",")]
-        configs.update([(x.strip(), y.strip()) for (x, y) in pairs])
-
-    # Setup the module names
-    ext_module = 'calibre.ebooks.markdown.extensions'
-    module_name_new_style = '.'.join([ext_module, ext_name])
-    module_name_old_style = '_'.join(['mdx', ext_name])
-
-    # Try loading the extention first from one place, then another
-    try: # New style (markdown.extensons.<extension>)
-        module = __import__(module_name_new_style, {}, {}, [ext_module])
-    except ImportError:
-        try: # Old style (mdx.<extension>)
-            module = __import__(module_name_old_style)
-        except ImportError:
-            message(WARN, "Failed loading extension '%s' from '%s' or '%s'"
-                % (ext_name, module_name_new_style, module_name_old_style))
-            # Return None so we don't try to initiate none-existant extension
-            return None
-
-    # If the module is loaded successfully, we expect it to define a
-    # function called makeExtension()
-    try:
-        return module.makeExtension(configs.items())
-    except AttributeError:
-        message(CRITICAL, "Failed to initiate extension '%s'" % ext_name)
-
-
-def load_extensions(ext_names):
-    """Loads multiple extensions"""
-    extensions = []
-    for ext_name in ext_names:
-        extension = load_extension(ext_name)
-        if extension:
-            extensions.append(extension)
-    return extensions
-
-
-"""
-EXPORTED FUNCTIONS
-=============================================================================
-
-Those are the two functions we really mean to export: markdown() and
-markdownFromFile().
-"""
-
-def markdown(text,
-             extensions = [],
-             safe_mode = False,
-             output_format = DEFAULT_OUTPUT_FORMAT):
-    """Convert a markdown string to HTML and return HTML as a unicode string.
-
-    This is a shortcut function for `Markdown` class to cover the most
-    basic use case.  It initializes an instance of Markdown, loads the
-    necessary extensions and runs the parser on the given text.
-
-    Keyword arguments:
-
-    * text: Markdown formatted text as Unicode or ASCII string.
-    * extensions: A list of extensions or extension names (may contain config args).
-    * safe_mode: Disallow raw html.  One of "remove", "replace" or "escape".
-    * output_format: Format of output. Supported formats are:
-        * "xhtml1": Outputs XHTML 1.x. Default.
-        * "xhtml": Outputs latest supported version of XHTML (currently XHTML 1.1).
-        * "html4": Outputs HTML 4
-        * "html": Outputs latest supported version of HTML (currently HTML 4).
-        Note that it is suggested that the more specific formats ("xhtml1" 
-        and "html4") be used as "xhtml" or "html" may change in the future
-        if it makes sense at that time. 
-
-    Returns: An HTML document as a string.
-
-    """
-    md = Markdown(extensions=load_extensions(extensions),
-                  safe_mode=safe_mode, 
-                  output_format=output_format)
-    return md.convert(text)
-
-
-def markdownFromFile(input = None,
-                     output = None,
-                     extensions = [],
-                     encoding = None,
-                     safe_mode = False,
-                     output_format = DEFAULT_OUTPUT_FORMAT):
-    """Read markdown code from a file and write it to a file or a stream."""
-    md = Markdown(extensions=load_extensions(extensions), 
-                  safe_mode=safe_mode,
-                  output_format=output_format)
-    md.convertFile(input, output, encoding)
-
-
-def main():
-    from commandline import run
-    run()
-
-
-if __name__ == '__main__':
-    sys.exit(main())
-    ''' Run Markdown from the command line. '''
--- a/src/calibre/ebooks/markdown/odict.py
+++ b/src/calibre/ebooks/markdown/odict.py
@ -1,3 +1,14 @@
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+
+from copy import deepcopy
+
+def iteritems_compat(d):
+    """Return an iterator over the (key, value) pairs of a dictionary.
+    Copied from `six` module."""
+    return iter(getattr(d, _iteritems)())
+
 class OrderedDict(dict):
    """
    A dictionary that keeps its keys in the order in which they're inserted.
@ -11,34 +22,44 @@ class OrderedDict(dict):
        return instance

    def __init__(self, data=None):
-        if data is None:
-            data = {}
-        super(OrderedDict, self).__init__(data)
-        if isinstance(data, dict):
-            self.keyOrder = data.keys()
+        if data is None or isinstance(data, dict):
+            data = data or []
+            super(OrderedDict, self).__init__(data)
+            self.keyOrder = list(data) if data else []
        else:
-            self.keyOrder = []
+            super(OrderedDict, self).__init__()
+            super_set = super(OrderedDict, self).__setitem__
            for key, value in data:
-                if key not in self.keyOrder:
+                # Take the ordering from first key
+                if key not in self:
                    self.keyOrder.append(key)
+                # But override with last value in data (dict() does this)
+                super_set(key, value)

    def __deepcopy__(self, memo):
-        from copy import deepcopy
        return self.__class__([(key, deepcopy(value, memo))
-                               for key, value in self.iteritems()])
+                               for key, value in self.items()])
+
+    def __copy__(self):
+        # The Python's default copy implementation will alter the state
+        # of self. The reason for this seems complex but is likely related to
+        # subclassing dict.
+        return self.copy()

    def __setitem__(self, key, value):
-        super(OrderedDict, self).__setitem__(key, value)
-        if key not in self.keyOrder:
+        if key not in self:
            self.keyOrder.append(key)
+        super(OrderedDict, self).__setitem__(key, value)

    def __delitem__(self, key):
        super(OrderedDict, self).__delitem__(key)
        self.keyOrder.remove(key)

    def __iter__(self):
-        for k in self.keyOrder:
-            yield k
+        return iter(self.keyOrder)
+
+    def __reversed__(self):
+        return reversed(self.keyOrder)

    def pop(self, k, *args):
        result = super(OrderedDict, self).pop(k, *args)
@ -54,41 +75,51 @@ class OrderedDict(dict):
        self.keyOrder.remove(result[0])
        return result

-    def items(self):
-        return zip(self.keyOrder, self.values())
-
-    def iteritems(self):
+    def _iteritems(self):
        for key in self.keyOrder:
-            yield key, super(OrderedDict, self).__getitem__(key)
+            yield key, self[key]

-    def keys(self):
-        return self.keyOrder[:]
-
-    def iterkeys(self):
-        return iter(self.keyOrder)
-
-    def values(self):
-        return [super(OrderedDict, self).__getitem__(k) for k in self.keyOrder]
-
-    def itervalues(self):
+    def _iterkeys(self):
        for key in self.keyOrder:
-            yield super(OrderedDict, self).__getitem__(key)
+            yield key
+
+    def _itervalues(self):
+        for key in self.keyOrder:
+            yield self[key]
+
+    if util.PY3:
+        items = _iteritems
+        keys = _iterkeys
+        values = _itervalues
+    else:
+        iteritems = _iteritems
+        iterkeys = _iterkeys
+        itervalues = _itervalues
+
+        def items(self):
+            return [(k, self[k]) for k in self.keyOrder]
+
+        def keys(self):
+            return self.keyOrder[:]
+
+        def values(self):
+            return [self[k] for k in self.keyOrder]

    def update(self, dict_):
-        for k, v in dict_.items():
-            self.__setitem__(k, v)
+        for k, v in iteritems_compat(dict_):
+            self[k] = v

    def setdefault(self, key, default):
-        if key not in self.keyOrder:
+        if key not in self:
            self.keyOrder.append(key)
        return super(OrderedDict, self).setdefault(key, default)

    def value_for_index(self, index):
-        """Return the value of the item at the given zero-based index."""
+        """Returns the value of the item at the given zero-based index."""
        return self[self.keyOrder[index]]

    def insert(self, index, key, value):
-        """Insert the key, value pair before the item with the given index."""
+        """Inserts the key, value pair before the item with the given index."""
        if key in self.keyOrder:
            n = self.keyOrder.index(key)
            del self.keyOrder[n]
@ -98,18 +129,16 @@ class OrderedDict(dict):
        super(OrderedDict, self).__setitem__(key, value)

    def copy(self):
-        """Return a copy of this object."""
+        """Returns a copy of this object."""
        # This way of initializing the copy means it works for subclasses, too.
-        obj = self.__class__(self)
-        obj.keyOrder = self.keyOrder[:]
-        return obj
+        return self.__class__(self)

    def __repr__(self):
        """
-        Replace the normal dict.__repr__ with a version that returns the keys
-        in their sorted order.
+        Replaces the normal dict.__repr__ with a version that returns the keys
+        in their Ordered order.
        """
-        return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in self.items()])
+        return '{%s}' % ', '.join(['%r: %r' % (k, v) for k, v in iteritems_compat(self)])

    def clear(self):
        super(OrderedDict, self).clear()
@ -117,7 +146,10 @@ class OrderedDict(dict):

    def index(self, key):
        """ Return the index of a given key. """
-        return self.keyOrder.index(key)
+        try:
+            return self.keyOrder.index(key)
+        except ValueError:
+            raise ValueError("Element '%s' was not found in OrderedDict" % key)

    def index_for_location(self, location):
        """ Return index or None for a given location. """
@ -150,8 +182,8 @@ class OrderedDict(dict):
        """ Change location of an existing item. """
        n = self.keyOrder.index(key)
        del self.keyOrder[n]
-        i = self.index_for_location(location)
        try:
+            i = self.index_for_location(location)
            if i is not None:
                self.keyOrder.insert(i, key)
            else:
--- a/src/calibre/ebooks/markdown/postprocessors.py
+++ b/src/calibre/ebooks/markdown/postprocessors.py
@ -8,15 +8,23 @@ processing.

 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+from . import odict
+import re

-import markdown

-class Processor:
-    def __init__(self, markdown_instance=None):
-        if markdown_instance:
-            self.markdown = markdown_instance
+def build_postprocessors(md_instance, **kwargs):
+    """ Build the default postprocessors for Markdown. """
+    postprocessors = odict.OrderedDict()
+    postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
+    postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
+    postprocessors["unescape"] = UnescapePostprocessor()
+    return postprocessors

-class Postprocessor(Processor):
+
+class Postprocessor(util.Processor):
    """
    Postprocessors are run after the ElementTree it converted back into text.

@ -50,12 +58,12 @@ class RawHtmlPostprocessor(Postprocessor):
                elif str(self.markdown.safeMode).lower() == 'remove':
                    html = ''
                else:
-                    html = markdown.HTML_REMOVED_TEXT
-            if safe or not self.markdown.safeMode:
+                    html = self.markdown.html_replacement_text
+            if self.isblocklevel(html) and (safe or not self.markdown.safeMode):
                text = text.replace("<p>%s</p>" % 
-                            (markdown.preprocessors.HTML_PLACEHOLDER % i),
+                            (self.markdown.htmlStash.get_placeholder(i)),
                            html + "\n")
-            text =  text.replace(markdown.preprocessors.HTML_PLACEHOLDER % i, 
+            text =  text.replace(self.markdown.htmlStash.get_placeholder(i), 
                                 html)
        return text

@ -66,12 +74,31 @@ class RawHtmlPostprocessor(Postprocessor):
        html = html.replace('>', '&gt;')
        return html.replace('"', '&quot;')

+    def isblocklevel(self, html):
+        m = re.match(r'^\<\/?([^ >]+)', html)
+        if m:
+            if m.group(1)[0] in ('!', '?', '@', '%'):
+                # Comment, php etc...
+                return True
+            return util.isBlockLevel(m.group(1))
+        return False
+

 class AndSubstitutePostprocessor(Postprocessor):
    """ Restore valid entities """
-    def __init__(self):
-        pass

    def run(self, text):
-        text =  text.replace(markdown.AMP_SUBSTITUTE, "&")
+        text =  text.replace(util.AMP_SUBSTITUTE, "&")
        return text
+
+
+class UnescapePostprocessor(Postprocessor):
+    """ Restore escaped chars """
+
+    RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
+
+    def unescape(self, m):
+        return util.int2str(int(m.group(1)))
+
+    def run(self, text):
+        return self.RE.sub(self.unescape, text)
--- a/src/calibre/ebooks/markdown/preprocessors.py
+++ b/src/calibre/ebooks/markdown/preprocessors.py
@ -1,4 +1,3 @@
-
 """
 PRE-PROCESSORS
 =============================================================================
@ -7,18 +6,24 @@ Preprocessors work on source text before we start doing anything too
 complicated. 
 """

+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+from . import odict
 import re
-import markdown

-HTML_PLACEHOLDER_PREFIX = markdown.STX+"wzxhzdk:"
-HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%d" + markdown.ETX

-class Processor:
-    def __init__(self, markdown_instance=None):
-        if markdown_instance:
-            self.markdown = markdown_instance
+def build_preprocessors(md_instance, **kwargs):
+    """ Build the default set of preprocessors used by Markdown. """
+    preprocessors = odict.OrderedDict()
+    preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
+    if md_instance.safeMode != 'escape':
+        preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
+    preprocessors["reference"] = ReferencePreprocessor(md_instance)
+    return preprocessors

-class Preprocessor (Processor):
+
+class Preprocessor(util.Processor):
    """
    Preprocessors are run after the text is broken into lines.

@ -38,66 +43,95 @@ class Preprocessor (Processor):
        """
        pass

-class HtmlStash:
-    """
-    This class is used for stashing HTML objects that we extract
-    in the beginning and replace with place-holders.
-    """

-    def __init__ (self):
-        """ Create a HtmlStash. """
-        self.html_counter = 0 # for counting inline html segments
-        self.rawHtmlBlocks=[]
+class NormalizeWhitespace(Preprocessor):
+    """ Normalize whitespace for consistant parsing. """

-    def store(self, html, safe=False):
-        """
-        Saves an HTML segment for later reinsertion.  Returns a
-        placeholder string that needs to be inserted into the
-        document.
-
-        Keyword arguments:
-
-        * html: an html segment
-        * safe: label an html segment as safe for safemode
-
-        Returns : a placeholder string
-
-        """
-        self.rawHtmlBlocks.append((html, safe))
-        placeholder = HTML_PLACEHOLDER % self.html_counter
-        self.html_counter += 1
-        return placeholder
-
-    def reset(self):
-        self.html_counter = 0
-        self.rawHtmlBlocks = []
+    def run(self, lines):
+        source = '\n'.join(lines)
+        source = source.replace(util.STX, "").replace(util.ETX, "")
+        source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
+        source = source.expandtabs(self.markdown.tab_length)
+        source = re.sub(r'(?<=\n) +\n', '\n', source)
+        return source.split('\n')


 class HtmlBlockPreprocessor(Preprocessor):
    """Remove html blocks from the text and store them for later retrieval."""

    right_tag_patterns = ["</%s>", "%s>"]
+    attrs_pattern = r"""
+        \s+(?P<attr>[^>"'/= ]+)=(?P<q>['"])(?P<value>.*?)(?P=q)   # attr="value"
+        |                                                         # OR 
+        \s+(?P<attr1>[^>"'/= ]+)=(?P<value1>[^> ]+)               # attr=value
+        |                                                         # OR
+        \s+(?P<attr2>[^>"'/= ]+)                                  # attr
+        """
+    left_tag_pattern = r'^\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\s*\/?\>?' % attrs_pattern
+    attrs_re = re.compile(attrs_pattern, re.VERBOSE)
+    left_tag_re = re.compile(left_tag_pattern, re.VERBOSE)
+    markdown_in_raw = False

    def _get_left_tag(self, block):
-        return block[1:].replace(">", " ", 1).split()[0].lower()
+        m = self.left_tag_re.match(block)
+        if m:
+            tag = m.group('tag')
+            raw_attrs = m.group('attrs')
+            attrs = {}
+            if raw_attrs:
+                for ma in self.attrs_re.finditer(raw_attrs):
+                    if ma.group('attr'):
+                        if ma.group('value'):
+                            attrs[ma.group('attr').strip()] = ma.group('value')
+                        else:
+                            attrs[ma.group('attr').strip()] = ""
+                    elif ma.group('attr1'):
+                        if ma.group('value1'):
+                            attrs[ma.group('attr1').strip()] = ma.group('value1')
+                        else:
+                            attrs[ma.group('attr1').strip()] = ""
+                    elif ma.group('attr2'):
+                        attrs[ma.group('attr2').strip()] = ""
+            return tag, len(m.group(0)), attrs
+        else:
+            tag = block[1:].split(">", 1)[0].lower()
+            return tag, len(tag)+2, {}

-    def _get_right_tag(self, left_tag, block):
+    def _recursive_tagfind(self, ltag, rtag, start_index, block):
+        while 1:
+            i = block.find(rtag, start_index)
+            if i == -1:
+                return -1
+            j = block.find(ltag, start_index) 
+            # if no ltag, or rtag found before another ltag, return index
+            if (j > i or j == -1):
+                return i + len(rtag)
+            # another ltag found before rtag, use end of ltag as starting
+            # point and search again
+            j = block.find('>', j)
+            start_index = self._recursive_tagfind(ltag, rtag, j + 1, block)
+            if start_index == -1:
+                # HTML potentially malformed- ltag has no corresponding 
+                # rtag
+                return -1
+
+    def _get_right_tag(self, left_tag, left_index, block):
        for p in self.right_tag_patterns:
            tag = p % left_tag
-            i = block.rfind(tag)
+            i = self._recursive_tagfind("<%s" % left_tag, tag, left_index, block)
            if i > 2:
-                return tag.lstrip("<").rstrip(">"), i + len(p)-2 + len(left_tag)
-        return block.rstrip()[-len(left_tag)-2:-1].lower(), len(block)
+                return tag.lstrip("<").rstrip(">"), i
+        return block.rstrip()[-left_index:-1].lower(), len(block)
    
    def _equal_tags(self, left_tag, right_tag):
-        if left_tag == 'div' or left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
+        if left_tag[0] in ['?', '@', '%']: # handle PHP, etc.
            return True
        if ("/" + left_tag) == right_tag:
            return True
        if (right_tag == "--" and left_tag == "--"):
            return True
        elif left_tag == right_tag[1:] \
-            and right_tag[0] != "<":
+            and right_tag[0] == "/":
            return True
        else:
            return False
@ -108,7 +142,7 @@ class HtmlBlockPreprocessor(Preprocessor):
    def run(self, lines):
        text = "\n".join(lines)
        new_blocks = []
-        text = text.split("\n\n")
+        text = text.rsplit("\n\n")
        items = []
        left_tag = ''
        right_tag = ''
@ -124,15 +158,25 @@ class HtmlBlockPreprocessor(Preprocessor):
                block = block[1:]

            if not in_tag:
-                if block.startswith("<"):
-                    left_tag = self._get_left_tag(block)
-                    right_tag, data_index = self._get_right_tag(left_tag, block)
+                if block.startswith("<") and len(block.strip()) > 1:

-                    if data_index < len(block):
+                    if block[1] == "!":
+                        # is a comment block
+                        left_tag, left_index, attrs  = "--", 2, {}
+                    else:
+                        left_tag, left_index, attrs = self._get_left_tag(block)
+                    right_tag, data_index = self._get_right_tag(left_tag, 
+                                                                left_index,
+                                                                block)
+                    # keep checking conditions below and maybe just append
+                    
+                    if data_index < len(block) \
+                        and (util.isBlockLevel(left_tag)
+                        or left_tag == '--'): 
                        text.insert(0, block[data_index:])
                        block = block[:data_index]

-                    if not (markdown.isBlockLevel(left_tag) \
+                    if not (util.isBlockLevel(left_tag) \
                        or block[1] in ["!", "?", "@", "%"]):
                        new_blocks.append(block)
                        continue
@ -141,22 +185,27 @@ class HtmlBlockPreprocessor(Preprocessor):
                        new_blocks.append(block.strip())
                        continue

-                    if block[1] == "!":
-                        # is a comment block
-                        left_tag = "--"
-                        right_tag, data_index = self._get_right_tag(left_tag, block)
-                        # keep checking conditions below and maybe just append
-
                    if block.rstrip().endswith(">") \
                        and self._equal_tags(left_tag, right_tag):
-                        new_blocks.append(
-                            self.markdown.htmlStash.store(block.strip()))
+                        if self.markdown_in_raw and 'markdown' in attrs.keys():
+                            start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
+                                           '', block[:left_index])
+                            end = block[-len(right_tag)-2:]
+                            block = block[left_index:-len(right_tag)-2]
+                            new_blocks.append(
+                                self.markdown.htmlStash.store(start))
+                            new_blocks.append(block)
+                            new_blocks.append(
+                                self.markdown.htmlStash.store(end))
+                        else:
+                            new_blocks.append(
+                                self.markdown.htmlStash.store(block.strip()))
                        continue
-                    else: #if not block[1] == "!":
+                    else: 
                        # if is block level tag and is not complete

-                        if markdown.isBlockLevel(left_tag) or left_tag == "--" \
-                        and not block.rstrip().endswith(">"):
+                        if util.isBlockLevel(left_tag) or left_tag == "--" \
+                            and not block.rstrip().endswith(">"):
                            items.append(block.strip())
                            in_tag = True
                        else:
@ -168,19 +217,52 @@ class HtmlBlockPreprocessor(Preprocessor):
                new_blocks.append(block)

            else:
-                items.append(block.strip())
+                items.append(block)

-                right_tag, data_index = self._get_right_tag(left_tag, block)
+                right_tag, data_index = self._get_right_tag(left_tag, 0, block)

                if self._equal_tags(left_tag, right_tag):
                    # if find closing tag
+                    
+                    if data_index < len(block):
+                        # we have more text after right_tag
+                        items[-1] = block[:data_index]
+                        text.insert(0, block[data_index:])
+
                    in_tag = False
-                    new_blocks.append(
-                        self.markdown.htmlStash.store('\n\n'.join(items)))
+                    if self.markdown_in_raw and 'markdown' in attrs.keys():
+                        start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
+                                       '', items[0][:left_index])
+                        items[0] = items[0][left_index:]
+                        end = items[-1][-len(right_tag)-2:]
+                        items[-1] = items[-1][:-len(right_tag)-2]
+                        new_blocks.append(
+                            self.markdown.htmlStash.store(start))
+                        new_blocks.extend(items)
+                        new_blocks.append(
+                            self.markdown.htmlStash.store(end))
+                    else:
+                        new_blocks.append(
+                            self.markdown.htmlStash.store('\n\n'.join(items)))
                    items = []

        if items:
-            new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
+            if self.markdown_in_raw and 'markdown' in attrs.keys():
+                start = re.sub(r'\smarkdown(=[\'"]?[^> ]*[\'"]?)?', 
+                               '', items[0][:left_index])
+                items[0] = items[0][left_index:]
+                end = items[-1][-len(right_tag)-2:]
+                items[-1] = items[-1][:-len(right_tag)-2]
+                new_blocks.append(
+                    self.markdown.htmlStash.store(start))
+                new_blocks.extend(items)
+                if end.strip():
+                    new_blocks.append(
+                        self.markdown.htmlStash.store(end))
+            else:
+                new_blocks.append(
+                    self.markdown.htmlStash.store('\n\n'.join(items)))
+            #new_blocks.append(self.markdown.htmlStash.store('\n\n'.join(items)))
            new_blocks.append('\n')

        new_text = "\n\n".join(new_blocks)
@ -190,24 +272,26 @@ class HtmlBlockPreprocessor(Preprocessor):
 class ReferencePreprocessor(Preprocessor):
    """ Remove reference definitions from text and store for later use. """

-    RE = re.compile(r'^(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)$', re.DOTALL)
+    TITLE = r'[ ]*(\"(.*)\"|\'(.*)\'|\((.*)\))[ ]*'
+    RE = re.compile(r'^[ ]{0,3}\[([^\]]*)\]:\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL)
+    TITLE_RE = re.compile(r'^%s$' % TITLE)

    def run (self, lines):
        new_text = [];
-        for line in lines:
+        while lines:
+            line = lines.pop(0)
            m = self.RE.match(line)
            if m:
-                id = m.group(2).strip().lower()
-                t = m.group(4).strip()  # potential title
+                id = m.group(1).strip().lower()
+                link = m.group(2).lstrip('<').rstrip('>')
+                t = m.group(5) or m.group(6) or m.group(7)
                if not t:
-                    self.markdown.references[id] = (m.group(3), t)
-                elif (len(t) >= 2
-                      and (t[0] == t[-1] == "\""
-                           or t[0] == t[-1] == "\'"
-                           or (t[0] == "(" and t[-1] == ")") ) ):
-                    self.markdown.references[id] = (m.group(3), t[1:-1])
-                else:
-                    new_text.append(line)
+                    # Check next line for title
+                    tm = self.TITLE_RE.match(lines[0])
+                    if tm:
+                        lines.pop(0)
+                        t = tm.group(2) or tm.group(3) or tm.group(4)
+                self.markdown.references[id] = (link, t)
            else:
                new_text.append(line)

--- a/src/calibre/ebooks/markdown/serializers.py
+++ b/src/calibre/ebooks/markdown/serializers.py
@ -1,6 +1,6 @@
-# markdown/html4.py
+# markdown/searializers.py
 #
-# Add html4 serialization to older versions of Elementree
+# Add x/html serialization to Elementree
 # Taken from ElementTree 1.3 preview with slight modifications
 #
 # Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
@ -37,12 +37,19 @@
 # --------------------------------------------------------------------


-import markdown
-ElementTree = markdown.etree.ElementTree
-QName = markdown.etree.QName
-Comment = markdown.etree.Comment
-PI = markdown.etree.PI
-ProcessingInstruction = markdown.etree.ProcessingInstruction
+from __future__ import absolute_import
+from __future__ import unicode_literals
+from . import util
+ElementTree = util.etree.ElementTree
+QName = util.etree.QName
+if hasattr(util.etree, 'test_comment'):
+    Comment = util.etree.test_comment
+else:
+    Comment = util.etree.Comment
+PI = util.etree.PI
+ProcessingInstruction = util.etree.ProcessingInstruction
+
+__all__ = ['to_html_string', 'to_xhtml_string']

 HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
              "img", "input", "isindex", "link", "meta" "param")
@ -77,7 +84,7 @@ def _encode(text, encoding):
    except (TypeError, AttributeError):
        _raise_serialization_error(text)

-def _escape_cdata(text, encoding):
+def _escape_cdata(text):
    # escape character data
    try:
        # it's worth avoiding do-nothing calls for strings that are
@ -89,12 +96,12 @@ def _escape_cdata(text, encoding):
            text = text.replace("<", "&lt;")
        if ">" in text:
            text = text.replace(">", "&gt;")
-        return text.encode(encoding, "xmlcharrefreplace")
+        return text
    except (TypeError, AttributeError):
        _raise_serialization_error(text)


-def _escape_attrib(text, encoding):
+def _escape_attrib(text):
    # escape attribute value
    try:
        if "&" in text:
@ -107,38 +114,40 @@ def _escape_attrib(text, encoding):
            text = text.replace("\"", "&quot;")
        if "\n" in text:
            text = text.replace("\n", "&#10;")
-        return text.encode(encoding, "xmlcharrefreplace")
+        return text
    except (TypeError, AttributeError):
        _raise_serialization_error(text)

-def _escape_attrib_html(text, encoding):
+def _escape_attrib_html(text):
    # escape attribute value
    try:
        if "&" in text:
            text = text.replace("&", "&amp;")
+        if "<" in text:
+            text = text.replace("<", "&lt;")
        if ">" in text:
            text = text.replace(">", "&gt;")
        if "\"" in text:
            text = text.replace("\"", "&quot;")
-        return text.encode(encoding, "xmlcharrefreplace")
+        return text
    except (TypeError, AttributeError):
        _raise_serialization_error(text)


-def _serialize_html(write, elem, encoding, qnames, namespaces):
+def _serialize_html(write, elem, qnames, namespaces, format):
    tag = elem.tag
    text = elem.text
    if tag is Comment:
-        write("<!--%s-->" % _escape_cdata(text, encoding))
+        write("<!--%s-->" % _escape_cdata(text))
    elif tag is ProcessingInstruction:
-        write("<?%s?>" % _escape_cdata(text, encoding))
+        write("<?%s?>" % _escape_cdata(text))
    else:
        tag = qnames[tag]
        if tag is None:
            if text:
-                write(_escape_cdata(text, encoding))
+                write(_escape_cdata(text))
            for e in elem:
-                _serialize_html(write, e, encoding, qnames, None)
+                _serialize_html(write, e, qnames, None, format)
        else:
            write("<" + tag)
            items = elem.items()
@ -150,54 +159,55 @@ def _serialize_html(write, elem, encoding, qnames, namespaces):
                    if isinstance(v, QName):
                        v = qnames[v.text]
                    else:
-                        v = _escape_attrib_html(v, encoding)
-                    # FIXME: handle boolean attributes
-                    write(" %s=\"%s\"" % (qnames[k], v))
+                        v = _escape_attrib_html(v)
+                    if qnames[k] == v and format == 'html':
+                        # handle boolean attributes
+                        write(" %s" % v)
+                    else:
+                        write(" %s=\"%s\"" % (qnames[k], v))
                if namespaces:
                    items = namespaces.items()
                    items.sort(key=lambda x: x[1]) # sort on prefix
                    for v, k in items:
                        if k:
                            k = ":" + k
-                        write(" xmlns%s=\"%s\"" % (
-                            k.encode(encoding),
-                            _escape_attrib(v, encoding)
-                            ))
-            write(">")
-            tag = tag.lower()
-            if text:
-                if tag == "script" or tag == "style":
-                    write(_encode(text, encoding))
-                else:
-                    write(_escape_cdata(text, encoding))
-            for e in elem:
-                _serialize_html(write, e, encoding, qnames, None)
-            if tag not in HTML_EMPTY:
-                write("</" + tag + ">")
+                        write(" xmlns%s=\"%s\"" % (k, _escape_attrib(v)))
+            if format == "xhtml" and tag in HTML_EMPTY:
+                write(" />")
+            else:
+                write(">")
+                tag = tag.lower()
+                if text:
+                    if tag == "script" or tag == "style":
+                        write(text)
+                    else:
+                        write(_escape_cdata(text))
+                for e in elem:
+                    _serialize_html(write, e, qnames, None, format)
+                if tag not in HTML_EMPTY:
+                    write("</" + tag + ">")
    if elem.tail:
-        write(_escape_cdata(elem.tail, encoding))
+        write(_escape_cdata(elem.tail))

-def write_html(root, f,
-          # keyword arguments
-          encoding="us-ascii",
-          default_namespace=None):
+def _write_html(root,
+                encoding=None,
+                default_namespace=None,
+                format="html"):
    assert root is not None
-    if not hasattr(f, "write"):
-        f = open(f, "wb")
-    write = f.write
-    if not encoding:
-        encoding = "us-ascii"
-    qnames, namespaces = _namespaces(
-            root, encoding, default_namespace
-            )
-    _serialize_html(
-                write, root, encoding, qnames, namespaces
-                )
+    data = []
+    write = data.append
+    qnames, namespaces = _namespaces(root, default_namespace)
+    _serialize_html(write, root, qnames, namespaces, format)
+    if encoding is None:
+        return "".join(data)
+    else:
+        return _encode("".join(data))
+

 # --------------------------------------------------------------------
 # serialization support

-def _namespaces(elem, encoding, default_namespace=None):
+def _namespaces(elem, default_namespace=None):
    # identify namespaces used in this tree

    # maps qnames to *encoded* prefix:local names
@ -208,9 +218,6 @@ def _namespaces(elem, encoding, default_namespace=None):
    if default_namespace:
        namespaces[default_namespace] = ""

-    def encode(text):
-        return text.encode(encoding)
-
    def add_qname(qname):
        # calculate serialized qname representation
        try:
@ -224,17 +231,16 @@ def _namespaces(elem, encoding, default_namespace=None):
                    if prefix != "xml":
                        namespaces[uri] = prefix
                if prefix:
-                    qnames[qname] = encode("%s:%s" % (prefix, tag))
+                    qnames[qname] = "%s:%s" % (prefix, tag)
                else:
-                    qnames[qname] = encode(tag) # default element
+                    qnames[qname] = tag # default element
            else:
                if default_namespace:
-                    # FIXME: can this be handled in XML 1.0?
                    raise ValueError(
                        "cannot use non-qualified names with "
                        "default_namespace option"
                        )
-                qnames[qname] = encode(qname)
+                qnames[qname] = qname
        except TypeError:
            _raise_serialization_error(qname)

@ -247,7 +253,7 @@ def _namespaces(elem, encoding, default_namespace=None):
        tag = elem.tag
        if isinstance(tag, QName) and tag.text not in qnames:
            add_qname(tag.text)
-        elif isinstance(tag, basestring):
+        elif isinstance(tag, util.string_type):
            if tag not in qnames:
                add_qname(tag)
        elif tag is not None and tag is not Comment and tag is not PI:
@ -264,11 +270,8 @@ def _namespaces(elem, encoding, default_namespace=None):
            add_qname(text.text)
    return qnames, namespaces

-def to_html_string(element, encoding=None):
-    class dummy:
-        pass
-    data = []
-    file = dummy()
-    file.write = data.append
-    write_html(ElementTree(element).getroot(),file,encoding)
-    return "".join(data)
+def to_html_string(element):
+    return _write_html(ElementTree(element).getroot(), format="html")
+
+def to_xhtml_string(element):
+    return _write_html(ElementTree(element).getroot(), format="xhtml")
--- a/src/calibre/ebooks/markdown/treeprocessors.py
+++ b/src/calibre/ebooks/markdown/treeprocessors.py
@ -1,16 +1,26 @@
-import markdown
-import re
+from __future__ import unicode_literals
+from __future__ import absolute_import
+from . import util
+from . import odict
+from . import inlinepatterns
+
+
+def build_treeprocessors(md_instance, **kwargs):
+    """ Build the default treeprocessors for Markdown. """
+    treeprocessors = odict.OrderedDict()
+    treeprocessors["inline"] = InlineProcessor(md_instance)
+    treeprocessors["prettify"] = PrettifyTreeprocessor(md_instance)
+    return treeprocessors
+

 def isString(s):
    """ Check if it's string """
-    return isinstance(s, unicode) or isinstance(s, str)
+    if not isinstance(s, util.AtomicString):
+        return isinstance(s, util.string_type)
+    return False

-class Processor:
-    def __init__(self, markdown_instance=None):
-        if markdown_instance:
-            self.markdown = markdown_instance

-class Treeprocessor(Processor):
+class Treeprocessor(util.Processor):
    """
    Treeprocessors are run on the ElementTree object before serialization.

@ -36,18 +46,18 @@ class InlineProcessor(Treeprocessor):
    A Treeprocessor that traverses a tree, applying inline patterns.
    """

-    def __init__ (self, md):
-        self.__placeholder_prefix = markdown.INLINE_PLACEHOLDER_PREFIX
-        self.__placeholder_suffix = markdown.ETX
+    def __init__(self, md):
+        self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
+        self.__placeholder_suffix = util.ETX
        self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
                                      + len(self.__placeholder_suffix)
-        self.__placeholder_re = re.compile(markdown.INLINE_PLACEHOLDER % r'([0-9]{4})')
+        self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
        self.markdown = md

    def __makePlaceholder(self, type):
        """ Generate a placeholder """
        id = "%04d" % len(self.stashed_nodes)
-        hash = markdown.INLINE_PLACEHOLDER % id
+        hash = util.INLINE_PLACEHOLDER % id
        return hash, id

    def __findPlaceholder(self, data, index):
@ -60,8 +70,8 @@ class InlineProcessor(Treeprocessor):
        * index: index, from which we start search

        Returns: placeholder id and string index, after the found placeholder.
-        """
        
+        """
        m = self.__placeholder_re.search(data, index)
        if m:
            return m.group(1), m.end()
@ -87,7 +97,7 @@ class InlineProcessor(Treeprocessor):
        Returns: String with placeholders.

        """
-        if not isinstance(data, markdown.AtomicString):
+        if not isinstance(data, util.AtomicString):
            startIndex = 0
            while patternIndex < len(self.markdown.inlinePatterns):
                data, matched, startIndex = self.__applyPattern(
@ -140,6 +150,7 @@ class InlineProcessor(Treeprocessor):
        * parent: Element, which contains processing inline data

        Returns: list with ElementTree elements with applied inline patterns.
+        
        """
        def linkText(text):
            if text:
@ -153,7 +164,6 @@ class InlineProcessor(Treeprocessor):
                        parent.text += text
                    else:
                        parent.text = text
-
        result = []
        strartIndex = 0
        while data:
@ -172,7 +182,7 @@ class InlineProcessor(Treeprocessor):
                        for child in [node] + node.getchildren():
                            if child.tail:
                                if child.tail.strip():
-                                    self.__processElementText(node, child, False)
+                                    self.__processElementText(node, child,False)
                            if child.text:
                                if child.text.strip():
                                    self.__processElementText(child, child)
@ -190,6 +200,9 @@ class InlineProcessor(Treeprocessor):
                    strartIndex = end
            else:
                text = data[strartIndex:]
+                if isinstance(data, util.AtomicString):
+                    # We don't want to loose the AtomicString
+                    text = util.AtomicString(text)
                linkText(text)
                data = ""

@ -205,7 +218,7 @@ class InlineProcessor(Treeprocessor):
        * data: the text to be processed
        * pattern: the pattern to be checked
        * patternIndex: index of current pattern
-        * startIndex: string index, from which we starting search
+        * startIndex: string index, from which we start searching

        Returns: String with placeholders instead of ElementTree elements.

@ -219,10 +232,10 @@ class InlineProcessor(Treeprocessor):
        node = pattern.handleMatch(match)

        if node is None:
-            return data, True, len(leftData) + match.span(len(match.groups()))[0]
+            return data, True, len(leftData)+match.span(len(match.groups()))[0]

        if not isString(node):
-            if not isinstance(node.text, markdown.AtomicString):
+            if not isinstance(node.text, util.AtomicString):
                # We need to process current node too
                for child in [node] + node.getchildren():
                    if not isString(node):
@ -244,14 +257,14 @@ class InlineProcessor(Treeprocessor):

        Iterate over ElementTree, find elements with inline tag, apply inline
        patterns and append newly created Elements to tree.  If you don't
-        want process your data with inline paterns, instead of normal string,
+        want to process your data with inline paterns, instead of normal string,
        use subclass AtomicString:

-            node.text = markdown.AtomicString("data won't be processed with inline patterns")
+            node.text = markdown.AtomicString("This will not be processed.")

        Arguments:

-        * markdownTree: ElementTree object, representing Markdown tree.
+        * tree: ElementTree object, representing Markdown tree.

        Returns: ElementTree object with applied inline patterns.

@ -264,33 +277,46 @@ class InlineProcessor(Treeprocessor):
            currElement = stack.pop()
            insertQueue = []
            for child in currElement.getchildren():
-                if child.text and not isinstance(child.text, markdown.AtomicString):
+                if child.text and not isinstance(child.text, util.AtomicString):
                    text = child.text
                    child.text = None
                    lst = self.__processPlaceholders(self.__handleInline(
                                                    text), child)
                    stack += lst
                    insertQueue.append((child, lst))
-
+                if child.tail:
+                    tail = self.__handleInline(child.tail)
+                    dumby = util.etree.Element('d')
+                    tailResult = self.__processPlaceholders(tail, dumby)
+                    if dumby.text:
+                        child.tail = dumby.text
+                    else:
+                        child.tail = None
+                    pos = currElement.getchildren().index(child) + 1
+                    tailResult.reverse()
+                    for newChild in tailResult:
+                        currElement.insert(pos, newChild)
                if child.getchildren():
                    stack.append(child)

            for element, lst in insertQueue:
-                if element.text:
-                    element.text = \
-                        markdown.inlinepatterns.handleAttributes(element.text,
-                                                                 element)
+                if self.markdown.enable_attributes:
+                    if element.text and isString(element.text):
+                        element.text = \
+                            inlinepatterns.handleAttributes(element.text, 
+                                                                    element)
                i = 0
                for newChild in lst:
-                    # Processing attributes
-                    if newChild.tail:
-                        newChild.tail = \
-                            markdown.inlinepatterns.handleAttributes(newChild.tail,
-                                                                     element)
-                    if newChild.text:
-                        newChild.text = \
-                            markdown.inlinepatterns.handleAttributes(newChild.text,
-                                                                     newChild)
+                    if self.markdown.enable_attributes:
+                        # Processing attributes
+                        if newChild.tail and isString(newChild.tail):
+                            newChild.tail = \
+                                inlinepatterns.handleAttributes(newChild.tail,
+                                                                    element)
+                        if newChild.text and isString(newChild.text):
+                            newChild.text = \
+                                inlinepatterns.handleAttributes(newChild.text,
+                                                                    newChild)
                    element.insert(i, newChild)
                    i += 1
        return tree
@ -303,12 +329,12 @@ class PrettifyTreeprocessor(Treeprocessor):
        """ Recursively add linebreaks to ElementTree children. """

        i = "\n"
-        if markdown.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
+        if util.isBlockLevel(elem.tag) and elem.tag not in ['code', 'pre']:
            if (not elem.text or not elem.text.strip()) \
-                    and len(elem) and markdown.isBlockLevel(elem[0].tag):
+                    and len(elem) and util.isBlockLevel(elem[0].tag):
                elem.text = i
            for e in elem:
-                if markdown.isBlockLevel(e.tag):
+                if util.isBlockLevel(e.tag):
                    self._prettifyETree(e)
            if not elem.tail or not elem.tail.strip():
                elem.tail = i
@ -327,3 +353,8 @@ class PrettifyTreeprocessor(Treeprocessor):
                br.tail = '\n'
            else:
                br.tail = '\n%s' % br.tail
+        # Clean up extra empty lines at end of code blocks.
+        pres = root.getiterator('pre')
+        for pre in pres:
+            if len(pre) and pre[0].tag == 'code':
+                pre[0].text = pre[0].text.rstrip() + '\n'
--- a/src/calibre/ebooks/markdown/util.py
+++ b/src/calibre/ebooks/markdown/util.py
@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+import re
+import sys
+
+
+"""
+Python 3 Stuff
+=============================================================================
+"""
+PY3 = sys.version_info[0] == 3
+
+if PY3:
+    string_type = str
+    text_type = str
+    int2str = chr
+else:
+    string_type = basestring
+    text_type = unicode
+    int2str = unichr
+
+
+"""
+Constants you might want to modify
+-----------------------------------------------------------------------------
+"""
+
+BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
+                                  "|script|noscript|form|fieldset|iframe|math"
+                                  "|hr|hr/|style|li|dt|dd|thead|tbody"
+                                  "|tr|th|td|section|footer|header|group|figure"
+                                  "|figcaption|aside|article|canvas|output"
+                                  "|progress|video)$", re.IGNORECASE)
+# Placeholders
+STX = '\u0002'  # Use STX ("Start of text") for start-of-placeholder
+ETX = '\u0003'  # Use ETX ("End of text") for end-of-placeholder
+INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
+INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
+INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
+AMP_SUBSTITUTE = STX+"amp"+ETX
+
+"""
+Constants you probably do not need to change
+-----------------------------------------------------------------------------
+"""
+
+RTL_BIDI_RANGES = ( ('\u0590', '\u07FF'),
+                     # Hebrew (0590-05FF), Arabic (0600-06FF),
+                     # Syriac (0700-074F), Arabic supplement (0750-077F),
+                     # Thaana (0780-07BF), Nko (07C0-07FF).
+                    ('\u2D30', '\u2D7F'), # Tifinagh
+                    )
+
+# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
+# markdown.util import etree`).  Do not import it by yourself.
+
+try: # Is the C implemenation of ElementTree available?
+    import xml.etree.cElementTree as etree
+    from xml.etree.ElementTree import Comment
+    # Serializers (including ours) test with non-c Comment
+    etree.test_comment = Comment
+    if etree.VERSION < "1.0.5":
+        raise RuntimeError("cElementTree version 1.0.5 or higher is required.")
+except (ImportError, RuntimeError):
+    # Use the Python implementation of ElementTree?
+    import xml.etree.ElementTree as etree
+    if etree.VERSION < "1.1":
+        raise RuntimeError("ElementTree version 1.1 or higher is required")
+
+
+"""
+AUXILIARY GLOBAL FUNCTIONS
+=============================================================================
+"""
+
+
+def isBlockLevel(tag):
+    """Check if the tag is a block level HTML tag."""
+    if isinstance(tag, string_type):
+        return BLOCK_LEVEL_ELEMENTS.match(tag)
+    # Some ElementTree tags are not strings, so return False.
+    return False
+
+"""
+MISC AUXILIARY CLASSES
+=============================================================================
+"""
+
+class AtomicString(text_type):
+    """A string which should not be further processed."""
+    pass
+
+
+class Processor(object):
+    def __init__(self, markdown_instance=None):
+        if markdown_instance:
+            self.markdown = markdown_instance
+
+
+class HtmlStash(object):
+    """
+    This class is used for stashing HTML objects that we extract
+    in the beginning and replace with place-holders.
+    """
+
+    def __init__ (self):
+        """ Create a HtmlStash. """
+        self.html_counter = 0 # for counting inline html segments
+        self.rawHtmlBlocks=[]
+
+    def store(self, html, safe=False):
+        """
+        Saves an HTML segment for later reinsertion.  Returns a
+        placeholder string that needs to be inserted into the
+        document.
+
+        Keyword arguments:
+
+        * html: an html segment
+        * safe: label an html segment as safe for safemode
+
+        Returns : a placeholder string
+
+        """
+        self.rawHtmlBlocks.append((html, safe))
+        placeholder = self.get_placeholder(self.html_counter)
+        self.html_counter += 1
+        return placeholder
+
+    def reset(self):
+        self.html_counter = 0
+        self.rawHtmlBlocks = []
+
+    def get_placeholder(self, key):
+        return "%swzxhzdk:%d%s" % (STX, key, ETX)
+
--- a/src/calibre/ebooks/txt/processor.py
+++ b/src/calibre/ebooks/txt/processor.py
@ -97,9 +97,9 @@ def convert_basic(txt, title='', epub_split_size_kb=0):

 def convert_markdown(txt, title='', extensions=('footnotes', 'tables', 'toc')):
    from calibre.ebooks.conversion.plugins.txt_input import MD_EXTENSIONS
-    from calibre.ebooks.markdown import markdown
+    from calibre.ebooks.markdown import Markdown
    extensions = [x.lower() for x in extensions if x.lower() in MD_EXTENSIONS]
-    md = markdown.Markdown(
+    md = Markdown(
          extensions,
          safe_mode=False)
    return HTML_TEMPLATE % (title, md.convert(txt))