Implement support for markdown in txt2lrf. Fix handling of <pre>, <th> elements in html2lrf

2025-07-07 10:14:46 -04:00 · 2007-06-19 15:13:52 +00:00 · 2007-06-19 15:13:52 +00:00 · 9178ddf18e
commit 9178ddf18e
parent a082131823
13 changed files with 2277 additions and 105 deletions
--- a/setup.py
+++ b/setup.py
@ -26,6 +26,7 @@ entry_points = {
                             'rtf-meta = libprs500.ebooks.metadata.rtf:main', \
                             'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \
                             'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\
+                             'markdown = libprs500.ebooks.markdown.markdown:main',\
                           ], 
        'gui_scripts'    : [ APPNAME+' = libprs500.gui.main:main']
      }
--- a/src/libprs500/init.py
+++ b/src/libprs500/init.py
@ -13,7 +13,7 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ''' E-book management software'''
-__version__   = "0.3.52"
+__version__   = "0.3.53"
 __docformat__ = "epytext"
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 __appname__   = 'libprs500'
--- a/src/libprs500/ebooks/lrf/init.py
+++ b/src/libprs500/ebooks/lrf/init.py
@ -73,6 +73,16 @@ def option_parser(usage):
    profiles=['prs500'] 
    parser.add_option('-o', '--output', action='store', default=None, \
                      help='Output file name. Default is derived from input filename')
+    laf = parser.add_option_group('LOOK AND FEEL')
+    laf.add_option('--cover', action='store', dest='cover', default=None, \
+                      help='Path to file containing image to be used as cover')
+    laf.add_option('--font-delta', action='store', type='float', default=0., \
+                      help="""Increase the font size by 2 * FONT_DELTA pts and """
+                      '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
+                      """If FONT_DELTA is negative, the font size is decreased.""",
+                      dest='font_delta')
+    laf.add_option('--disable-autorotation', action='store_true', default=False, 
+                   help='Disable autorotation of images.', dest='disable_autorotation')
    page = parser.add_option_group('PAGE OPTIONS')
    page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
                      choices=profiles, action='callback', callback=profile_from_string,
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -221,6 +221,7 @@ class HTMLConverter(object):
                 chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
                 link_exclude=re.compile('$'), 
                 page_break=re.compile('h[12]', re.IGNORECASE),
+                 force_page_break=re.compile('$', re.IGNORECASE),
                 profile=PRS500_PROFILE,
                 disable_autorotation=False):
        '''
@ -273,7 +274,8 @@ class HTMLConverter(object):
            small  = {'font-size'   :'small'},
            pre    = {'font-family' :'monospace' },
            tt     = {'font-family' :'monospace'},
-            center = {'text-align'  : 'center'}
+            center = {'text-align'  : 'center'},
+            th     = {'font-size':'large', 'font-weight':'bold'},
            )        
        self.profile     = profile #: Defines the geometry of the display device
        self.chapter_detection = chapter_detection #: Flag to toggle chapter detection
@ -287,7 +289,8 @@ class HTMLConverter(object):
        self.blockquote_style = book.create_block_style(sidemargin=60, 
                                                        topskip=20, footskip=20)
        self.unindented_style = book.create_text_style(parindent=0)
-        self.page_break       = page_break #: Regex controlling forced page-break behavior
+        self.page_break       = page_break #: Regex controlling page-break behavior
+        self.force_page_break = force_page_break #: Regex controlling forced page-break behavior
        self.text_styles      = []#: Keep track of already used textstyles
        self.block_styles     = []#: Keep track of already used blockstyles
        self.images  = {}         #: Images referenced in the HTML document
@ -559,6 +562,7 @@ class HTMLConverter(object):
                                     chapter_regex=self.chapter_regex,
                                     link_exclude=self.link_exclude,
                                     page_break=self.page_break,
+                                     force_page_break=self.force_page_break,
                                     disable_autorotation=self.disable_autorotation)
                        HTMLConverter.processed_files[path] = self.files[path]
                    except Exception:
@ -829,6 +833,9 @@ class HTMLConverter(object):
           tag_css['page-break-after'].lower() != 'avoid':
            end_page = True
            tag_css.pop('page-break-after')
+        if self.force_page_break.match(tagname):
+            self.end_page()
+            self.page_break_found = True
        if not self.page_break_found and self.page_break.match(tagname):
            if len(self.current_page.contents) > 3:
                self.end_page()
@ -956,6 +963,7 @@ class HTMLConverter(object):
                except ConversionError:
                    pass
            self.end_current_block()
+            self.current_block = self.book.create_text_block()
        elif tagname in ['ul', 'ol']:
            self.in_ol = 1 if tagname == 'ol' else 0
            self.end_current_block()
@ -1138,13 +1146,15 @@ def process_file(path, options):
             re.compile('$')
        pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
             re.compile('$')
+        fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
+             re.compile('$')
        conv = HTMLConverter(book, path, profile=options.profile,
                             font_delta=options.font_delta, 
                             cover=cpath, max_link_levels=options.link_levels,
                             verbose=options.verbose, baen=options.baen, 
                             chapter_detection=options.chapter_detection,
                             chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
-                             link_exclude=re.compile(le), page_break=pb,
+                             link_exclude=re.compile(le), page_break=pb, force_page_break=fpb,
                             disable_autorotation=options.disable_autorotation)
        conv.process_links()
        oname = options.output
@ -1220,23 +1230,14 @@ def try_opf(path, options):
                
            

-def parse_options(argv=None, cli=True):
+def parse_options(argv=None, cli=True, parser=None):
    """ CLI for html -> lrf conversions """
    if not argv:
        argv = sys.argv[1:]
+    if not parser:
        parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip]

         %prog converts mybook.html to mybook.lrf""")
-    laf = parser.add_option_group('LOOK AND FEEL')
-    laf.add_option('--cover', action='store', dest='cover', default=None, \
-                      help='Path to file containing image to be used as cover')
-    laf.add_option('--font-delta', action='store', type='float', default=0., \
-                      help="""Increase the font size by 2 * FONT_DELTA pts and """
-                      '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
-                      """If FONT_DELTA is negative, the font size is decreased.""",
-                      dest='font_delta')
-    laf.add_option('--disable-autorotation', action='store_true', default=False, 
-                   help='Disable autorotation of images.', dest='disable_autorotation')
    link = parser.add_option_group('LINK PROCESSING OPTIONS')
    link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
                      dest='link_levels',
@ -1265,6 +1266,8 @@ def parse_options(argv=None, cli=True):
                      '''there are no really long pages as this degrades the page '''
                      '''turn performance of the LRF. Thus this option is ignored '''
                      '''if the current page has only a few elements.''')
+    chapter.add_option('--force-page-break-before', dest='force_page_break',
+                       default='$', help='Like --page-break-before, but page breaks are forced.')
    prepro = parser.add_option_group('PREPROCESSING OPTIONS')
    prepro.add_option('--baen', action='store_true', default=False, dest='baen',
                      help='''Preprocess Baen HTML files to improve generated LRF.''')
@ -1285,7 +1288,8 @@ def main():
        if options.verbose:
            import warnings
            warnings.defaultaction = 'error'
-    except:        
+    except Exception, err:
+        print >> sys.stderr, err
        sys.exit(1)    
    process_file(src, options)

--- a/src/libprs500/ebooks/lrf/html/table.py
+++ b/src/libprs500/ebooks/lrf/html/table.py
@ -12,7 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import math, sys
+import math, sys, re

 from libprs500.ebooks.lrf.fonts import get_font
 from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
@ -215,7 +215,7 @@ class Row(object):
    def __init__(self, conv, row, css, colpad):
        self.cells = []
        self.colpad = colpad
-        cells = row.findAll('td')
+        cells = row.findAll(re.compile('td|th'))
        for cell in cells:
            ccss = conv.tag_css(cell, css)
            self.cells.append(Cell(conv, cell, ccss))        
--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@ -15,19 +15,17 @@
 """
 Convert .txt files to .lrf
 """
-import os, sys
+import os, sys, codecs

-from libprs500.ebooks import BeautifulSoup
-from libprs500.ebooks.lrf import ConversionError, option_parser
-from libprs500.ebooks.lrf import Book
-from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
-from libprs500 import filename_to_utf8
 from libprs500 import iswindows
+from libprs500.ptempfile import PersistentTemporaryFile
+from libprs500.ebooks.lrf import ConversionError, option_parser
+from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
+from libprs500.ebooks.lrf.html.convert_from import process_file
+from libprs500.ebooks.markdown import markdown

-def parse_options(argv=None, cli=True):
+def parse_options(cli=True):
    """ CLI for txt -> lrf conversions """
-    if not argv:
-        argv = sys.argv[1:]
    parser = option_parser(
        """usage: %prog [options] mybook.txt
        
@ -44,84 +42,78 @@ def parse_options(argv=None, cli=True):
        if cli:
            parser.print_help()
        raise ConversionError, 'no filename specified'
-    if options.title == None:
-        options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
    return options, args, parser

+def generate_html(txtfile, encoding):
+    '''
+    Convert txtfile to html and return a PersistentTemporaryFile object pointing
+    to the file with the HTML.
+    '''
+    encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
+    if iswindows:
+        encodings = ['cp1252'] + encodings
+    if encoding not in ['cp1252', 'utf8']:
+        encodings = [encoding] + encodings
+    txt, enc = None, None
+    for encoding in encodings:
+        try:
+            txt = codecs.open(txtfile, 'rb', encoding).read()
+        except UnicodeDecodeError:
+            continue
+        enc = encoding
+        break
+    if txt == None:
+        raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
+    md = markdown.Markdown(txt,
+                           extensions=['footnotes', 'tables', 'toc'],
+                           encoding=enc,
+                           safe_mode=False,
+                           )
+    html = md.toString().decode(enc)
+    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
+    p.close()
+    codecs.open(p.name, 'wb', enc).write(html)
+    return p
+        
 def main():
    try:
        options, args, parser = parse_options()
-        src = os.path.abspath(os.path.expanduser(args[0]))
-    except:        
+        txt = os.path.abspath(os.path.expanduser(args[0]))
+        p = generate_html(txt, options.encoding)
+        for i in range(1, len(sys.argv)):
+            if sys.argv[i] == args[0]:
+                sys.argv.remove(sys.argv[i])
+                break            
+        sys.argv.append(p.name)
+        sys.argv.append('--force-page-break-before')
+        sys.argv.append('h2')
+        o_spec = False
+        for arg in sys.argv[1:]:
+            arg = arg.lstrip()
+            if arg.startswith('-o') or arg.startswith('--output'):
+                o_spec = True
+                break
+        ext = '.lrf'
+        for arg in sys.argv[1:]:
+            if arg.strip() == '--lrs':
+                ext = '.lrs'
+                break
+        if not o_spec:
+            sys.argv.append('-o')
+            sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext)
+        options, args, parser = html_parse_options(parser=parser)
+        src = args[0]
+        if options.verbose:
+            import warnings
+            warnings.defaultaction = 'error'        
+    except Exception, err:
+        print >> sys.stderr, err
+        import traceback
+        traceback.print_exc()
        sys.exit(1)
-    print 'Output written to ', convert_txt(src, options)
+    process_file(src, options)
        
    
-def convert_txt(path, options):
-    """
-    Convert the text file at C{path} into an lrf file.
-    @param options: Object with the following attributes:
-                    C{author}, C{title}, C{encoding} (the assumed encoding of 
-                    the text in C{path}.)
-    """
-    import codecs
-    header = None
-    if options.header:
-        header = Paragraph()
-        header.append(Bold(options.title))
-        header.append(' by ')
-        header.append(Italic(options.author))
-    title = (options.title, options.title_sort)
-    author = (options.author, options.author_sort)
-    book = Book(options, header=header, title=title, author=author, \
-                publisher=options.publisher,
-                sourceencoding=options.encoding, freetext=options.freetext, \
-                category=options.category, booksetting=BookSetting
-                (dpi=10*options.profile.dpi,
-                 screenheight=options.profile.screen_height, 
-                 screenwidth=options.profile.screen_width))
-    buffer = ''
-    pg = book.create_page()
-    block = book.create_text_block()
-    pg.append(block)
-    book.append(pg)
-    lines = ""
-    try:
-        lines = codecs.open(path, 'rb', options.encoding).readlines()
-    except UnicodeDecodeError:
-            try:
-                lines = codecs.open(path, 'rb', 'cp1252').readlines()
-            except UnicodeDecodeError:
-                try:
-                    lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
-                except UnicodeDecodeError:
-                    try:
-                        lines = codecs.open(path, 'rb', 'koi8_r').readlines()
-                    except UnicodeDecodeError:
-                        try:
-                            lines = codecs.open(path, 'rb', 'koi8_u').readlines()
-                        except UnicodeDecodeError:
-                            lines = codecs.open(path, 'rb', 'utf8').readlines()
-    for line in lines:
-        line = line.strip()
-        if line:
-            buffer = buffer.rstrip() + ' ' + line
-        else:
-            block.Paragraph(buffer)            
-            buffer = ''
-    basename = os.path.basename(path)
-    oname = options.output
-    if not oname:
-        oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf')
-    oname = os.path.abspath(os.path.expanduser(oname))
-    try: 
-        book.renderLrs(oname) if options.lrs else book.renderLrf(oname)
-    except UnicodeDecodeError:
-        raise ConversionError(path + ' is not encoded in ' + \
-                              options.encoding +'. Specify the '+ \
-                              'correct encoding with the -e option.')
-    return os.path.abspath(oname)
-    

 if __name__ == '__main__':
    main()
--- a/src/libprs500/ebooks/markdown/init.py
+++ b/src/libprs500/ebooks/markdown/init.py
@ -0,0 +1,5 @@
+''' Package defines lightweight markup language for processing of txt files'''
+# Initialize extensions
+from libprs500.ebooks.markdown import mdx_footnotes
+from libprs500.ebooks.markdown import mdx_tables
+from libprs500.ebooks.markdown import mdx_toc
--- a/src/libprs500/ebooks/markdown/markdown.py
+++ b/src/libprs500/ebooks/markdown/markdown.py
--- a/src/libprs500/ebooks/markdown/mdx_footnotes.py
+++ b/src/libprs500/ebooks/markdown/mdx_footnotes.py
@ -0,0 +1,255 @@
+"""
+## To see this file as plain text go to
+## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
+
+========================= FOOTNOTES =================================
+
+This section adds footnote handling to markdown.  It can be used as
+an example for extending python-markdown with relatively complex
+functionality.  While in this case the extension is included inside
+the module itself, it could just as easily be added from outside the
+module.  Not that all markdown classes above are ignorant about
+footnotes.  All footnote functionality is provided separately and
+then added to the markdown instance at the run time.
+
+Footnote functionality is attached by calling extendMarkdown()
+method of FootnoteExtension.  The method also registers the
+extension to allow it's state to be reset by a call to reset()
+method.
+"""
+
+FN_BACKLINK_TEXT = "zz1337820767766393qq"
+
+
+import re, markdown, random
+
+class FootnoteExtension (markdown.Extension):
+
+    DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
+    SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
+
+    def __init__ (self, configs) :
+
+        self.config = {'PLACE_MARKER' :
+                       ["///Footnotes Go Here///",
+                        "The text string that marks where the footnotes go"]}
+
+        for key, value in configs :
+            self.config[key][0] = value
+            
+        self.reset()
+
+    def extendMarkdown(self, md, md_globals) :
+
+        self.md = md
+
+        # Stateless extensions do not need to be registered
+        md.registerExtension(self)
+
+        # Insert a preprocessor before ReferencePreprocessor
+        index = md.preprocessors.index(md_globals['REFERENCE_PREPROCESSOR'])
+        preprocessor = FootnotePreprocessor(self)
+        preprocessor.md = md
+        md.preprocessors.insert(index, preprocessor)
+
+        # Insert an inline pattern before ImageReferencePattern
+        FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
+        index = md.inlinePatterns.index(md_globals['IMAGE_REFERENCE_PATTERN'])
+        md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
+
+        # Insert a post-processor that would actually add the footnote div
+        postprocessor = FootnotePostprocessor(self)
+        postprocessor.extension = self
+
+        md.postprocessors.append(postprocessor)
+        
+        textPostprocessor = FootnoteTextPostprocessor(self)
+
+        md.textPostprocessors.append(textPostprocessor)
+
+
+    def reset(self) :
+        # May be called by Markdown is state reset is desired
+
+        self.footnote_suffix = "-" + str(int(random.random()*1000000000))
+        self.used_footnotes={}
+        self.footnotes = {}
+
+    def findFootnotesPlaceholder(self, doc) :
+        def findFootnotePlaceholderFn(node=None, indent=0):
+            if node.type == 'text':
+                if node.value.find(self.getConfig("PLACE_MARKER")) > -1 :
+                    return True
+
+        fn_div_list = doc.find(findFootnotePlaceholderFn)
+        if fn_div_list :
+            return fn_div_list[0]
+
+
+    def setFootnote(self, id, text) :
+        self.footnotes[id] = text
+
+    def makeFootnoteId(self, num) :
+        return 'fn%d%s' % (num, self.footnote_suffix)
+
+    def makeFootnoteRefId(self, num) :
+        return 'fnr%d%s' % (num, self.footnote_suffix)
+
+    def makeFootnotesDiv (self, doc) :
+        """Creates the div with class='footnote' and populates it with
+           the text of the footnotes.
+
+           @returns: the footnote div as a dom element """
+
+        if not self.footnotes.keys() :
+            return None
+
+        div = doc.createElement("div")
+        div.setAttribute('class', 'footnote')
+        hr = doc.createElement("hr")
+        div.appendChild(hr)
+        ol = doc.createElement("ol")
+        div.appendChild(ol)
+
+        footnotes = [(self.used_footnotes[id], id)
+                     for id in self.footnotes.keys()]
+        footnotes.sort()
+
+        for i, id in footnotes :
+            li = doc.createElement('li')
+            li.setAttribute('id', self.makeFootnoteId(i))
+
+            self.md._processSection(li, self.footnotes[id].split("\n"))
+
+            #li.appendChild(doc.createTextNode(self.footnotes[id]))
+
+            backlink = doc.createElement('a')
+            backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
+            backlink.setAttribute('class', 'footnoteBackLink')
+            backlink.setAttribute('title',
+                                  'Jump back to footnote %d in the text' % 1)
+            backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
+
+            if li.childNodes :
+                node = li.childNodes[-1]
+                if node.type == "text" :
+                    node = li
+                node.appendChild(backlink)
+
+            ol.appendChild(li)
+
+        return div
+
+
+class FootnotePreprocessor :
+
+    def __init__ (self, footnotes) :
+        self.footnotes = footnotes
+
+    def run(self, lines) :
+
+        self.blockGuru = markdown.BlockGuru()
+        lines = self._handleFootnoteDefinitions (lines)
+
+        # Make a hash of all footnote marks in the text so that we
+        # know in what order they are supposed to appear.  (This
+        # function call doesn't really substitute anything - it's just
+        # a way to get a callback for each occurence.
+
+        text = "\n".join(lines)
+        self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
+
+        return text.split("\n")
+
+
+    def recordFootnoteUse(self, match) :
+
+        id = match.group(1)
+        id = id.strip()
+        nextNum = len(self.footnotes.used_footnotes.keys()) + 1
+        self.footnotes.used_footnotes[id] = nextNum
+
+
+    def _handleFootnoteDefinitions(self, lines) :
+        """Recursively finds all footnote definitions in the lines.
+
+            @param lines: a list of lines of text
+            @returns: a string representing the text with footnote
+                      definitions removed """
+
+        i, id, footnote = self._findFootnoteDefinition(lines)
+
+        if id :
+
+            plain = lines[:i]
+
+            detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
+
+            self.footnotes.setFootnote(id,
+                                       footnote + "\n"
+                                       + "\n".join(detabbed))
+
+            more_plain = self._handleFootnoteDefinitions(theRest)
+            return plain + [""] + more_plain
+
+        else :
+            return lines
+
+    def _findFootnoteDefinition(self, lines) :
+        """Finds the first line of a footnote definition.
+
+            @param lines: a list of lines of text
+            @returns: the index of the line containing a footnote definition """
+
+        counter = 0
+        for line in lines :
+            m = self.footnotes.DEF_RE.match(line)
+            if m :
+                return counter, m.group(2), m.group(3)
+            counter += 1
+        return counter, None, None
+
+
+class FootnotePattern (markdown.Pattern) :
+
+    def __init__ (self, pattern, footnotes) :
+
+        markdown.Pattern.__init__(self, pattern)
+        self.footnotes = footnotes
+
+    def handleMatch(self, m, doc) :
+        sup = doc.createElement('sup')
+        a = doc.createElement('a')
+        sup.appendChild(a)
+        id = m.group(2)
+        num = self.footnotes.used_footnotes[id]
+        sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
+        a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
+        a.appendChild(doc.createTextNode(str(num)))
+        return sup
+
+class FootnotePostprocessor (markdown.Postprocessor):
+
+    def __init__ (self, footnotes) :
+        self.footnotes = footnotes
+
+    def run(self, doc) :
+        footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
+        if footnotesDiv :
+            fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
+            if fnPlaceholder :
+                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
+            else :
+                doc.documentElement.appendChild(footnotesDiv)
+
+class FootnoteTextPostprocessor (markdown.Postprocessor):
+
+    def __init__ (self, footnotes) :
+        self.footnotes = footnotes
+
+    def run(self, text) :
+        return text.replace(FN_BACKLINK_TEXT, "&#8617;")
+
+def makeExtension(configs=None) :
+    return FootnoteExtension(configs=configs)
+
--- a/src/libprs500/ebooks/markdown/mdx_tables.py
+++ b/src/libprs500/ebooks/markdown/mdx_tables.py
@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+"""
+Table extension for Python-Markdown
+"""
+
+import markdown
+
+
+class TablePattern(markdown.Pattern) :
+	def __init__ (self, md):
+		markdown.Pattern.__init__(self, r'^\|([^\n]*)\|(\n|$)')
+		self.md = md
+
+	def handleMatch(self, m, doc) :
+		# a single line represents a row
+		tr = doc.createElement('tr')
+		tr.appendChild(doc.createTextNode('\n'))
+		# chunks between pipes represent cells
+		for t in m.group(2).split('|'):
+			if len(t) >= 2 and t.startswith('*') and t.endswith('*'):
+				# if a cell is bounded by asterisks, it is a <th>
+				td = doc.createElement('th')
+				t = t[1:-1]
+			else:
+				# otherwise it is a <td>
+				td = doc.createElement('td')
+			# apply inline patterns on chunks
+			for n in self.md._handleInline(t):
+				if(type(n) == unicode):
+					td.appendChild(doc.createTextNode(n))
+				else:
+					td.appendChild(n)
+			tr.appendChild(td)
+			# very long lines are evil
+			tr.appendChild(doc.createTextNode('\n'))
+		return tr
+
+
+class TablePostprocessor:
+	def run(self, doc):
+		# markdown wrapped our <tr>s in a <p>, we fix that here
+		def test_for_p(element):
+			return element.type == 'element' and element.nodeName == 'p'
+		# replace "p > tr" with "table > tr"
+		for element in doc.find(test_for_p):
+			for node in element.childNodes:
+				if(node.type == 'text' and node.value.strip() == ''):
+					# skip leading whitespace
+					continue
+				if (node.type == 'element' and node.nodeName == 'tr'):
+					element.nodeName = 'table'
+				break
+
+
+class TableExtension(markdown.Extension):
+	def extendMarkdown(self, md, md_globals):
+		md.inlinePatterns.insert(0, TablePattern(md))
+		md.postprocessors.append(TablePostprocessor())
+
+
+def makeExtension(configs):
+	return TableExtension(configs)
+
+
--- a/src/libprs500/ebooks/markdown/mdx_toc.py
+++ b/src/libprs500/ebooks/markdown/mdx_toc.py
@ -0,0 +1,165 @@
+## To access this file as plain text go to
+## http://freewisdom.org/projects/python-markdown/mdx_toc.raw_content
+
+"""
+Chris Clark - clach04 -at- sf.net
+
+My markdown extensions for adding:
+    Table of Contents (aka toc)
+"""
+
+import os
+import sys
+import re
+import markdown
+
+DEFAULT_TITLE = None
+
+def extract_alphanumeric(in_str=None):
+    """take alpha-numeric (7bit ascii) and return as a string
+    """
+    # I'm sure this is really inefficient and 
+    # could be done with a lambda/map()
+    #x.strip().title().replace(' ', "")
+    out_str=[]
+    for x in in_str.title():
+        if x.isalnum(): out_str.append(x)
+    return ''.join(out_str)
+
+class TitlePostprocessor (markdown.Postprocessor):
+
+    def __init__ (self, extension) :
+        self.extension = extension
+
+    def run(self, doc) :
+        titleElement = self.extension.createTitle(doc)
+        if titleElement :
+            doc.documentElement.insertChild(0, titleElement)
+
+
+class TocExtension (markdown.Extension):
+    """Markdown extension: generate a Table Of Contents (aka toc)
+    toc is returned in a div tag with class='toc'
+    toc is either:
+        appended to end of document
+      OR 
+        replaces first string occurence of "///Table of Contents Goes Here///"
+    """
+
+    def __init__ (self) :
+        #maybe add these as parameters to the class init?
+        self.TOC_INCLUDE_MARKER = "///Table of Contents///"
+        self.TOC_TITLE = "Table Of Contents"
+        self.auto_toc_heading_type=2
+        self.toc_heading_type=3
+
+
+    def extendMarkdown(self, md, md_globals) :
+        # Just insert in the end
+        md.postprocessors.append(TocPostprocessor(self))
+        # Stateless extensions do not need to be registered, so we don't
+        # register.
+
+    def findTocPlaceholder(self, doc) :
+        def findTocPlaceholderFn(node=None, indent=0):
+            if node.type == 'text':
+                if node.value.find(self.TOC_INCLUDE_MARKER) > -1 :
+                    return True
+
+        toc_div_list = doc.find(findTocPlaceholderFn)
+        if toc_div_list :
+            return toc_div_list[0]
+
+
+    def createTocDiv(self, doc) :
+        """
+           Creates Table Of Contents based on headers.
+
+           @returns: toc as a single as a dom element 
+                     in a <div> tag with class='toc'
+        """
+
+        # Find headers
+        headers_compiled_re = re.compile("h[123456]", re.IGNORECASE)
+        def findHeadersFn(element=None):
+            if element.type=='element':
+                if headers_compiled_re.match(element.nodeName):
+                    return True
+        
+        headers_doc_list = doc.find(findHeadersFn)
+        
+        # Insert anchor tags into dom
+        generated_anchor_id=0
+        headers_list=[]
+        min_header_size_found = 6
+        for element in headers_doc_list:
+            heading_title = element.childNodes[0].value
+            if heading_title.strip() !="":
+                heading_type = int(element.nodeName[-1:])
+                if heading_type == self.auto_toc_heading_type:
+                    min_header_size_found=min(min_header_size_found,
+                                              heading_type)
+                
+                html_anchor_name= (extract_alphanumeric(heading_title)
+                                   +'__MD_autoTOC_%d' % (generated_anchor_id))
+                
+                # insert anchor tag inside header tags
+                html_anchor = doc.createElement("a")
+                html_anchor.setAttribute('name', html_anchor_name)
+                element.appendChild(html_anchor)
+                
+                headers_list.append( (heading_type, heading_title,
+                                      html_anchor_name) )
+                generated_anchor_id = generated_anchor_id + 1
+        
+        # create dom for toc
+        if headers_list != []:
+            # Create list
+            toc_doc_list = doc.createElement("ul")
+            for (heading_type, heading_title, html_anchor_name) in headers_list:
+                if heading_type == self.auto_toc_heading_type:
+                    toc_doc_entry = doc.createElement("li")
+                    toc_doc_link = doc.createElement("a")
+                    toc_doc_link.setAttribute('href', '#'+html_anchor_name)
+                    toc_doc_text = doc.createTextNode(heading_title)
+                    toc_doc_link.appendChild(toc_doc_text)
+                    toc_doc_entry.appendChild(toc_doc_link)
+                    toc_doc_list.appendChild(toc_doc_entry)
+                    
+            
+            # Put list into div            
+            div = doc.createElement("div")
+            div.setAttribute('class', 'toc')
+            if self.TOC_TITLE:
+                toc_header = doc.createElement("h%d"%(self.toc_heading_type) )
+                toc_header_text = doc.createTextNode(self.TOC_TITLE)
+                toc_header.appendChild(toc_header_text)
+                div.appendChild(toc_header)
+            div.appendChild(toc_doc_list)
+            #hr = doc.createElement("hr")
+            #div.appendChild(hr)
+
+            return div
+
+
+class TocPostprocessor (markdown.Postprocessor):
+
+    def __init__ (self, toc) :
+        self.toc = toc
+
+    def run(self, doc):
+        tocPlaceholder = self.toc.findTocPlaceholder(doc)
+        
+        tocDiv = self.toc.createTocDiv(doc)
+        if tocDiv:
+            if tocPlaceholder :
+                # Replace "magic" pattern with toc
+                tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
+            else :
+                # Dump at the end of the DOM
+                # Probably want to use CSS to position div
+                doc.documentElement.appendChild(tocDiv)
+
+
+def makeExtension(configs=None) :
+    return TocExtension()
--- a/src/libprs500/ptempfile.py
+++ b/src/libprs500/ptempfile.py
@ -47,7 +47,7 @@ class _TemporaryFileWrapper(object):
            os.remove(self.name)
    
    
-def PersistentTemporaryFile(suffix="", prefix=""):
+def PersistentTemporaryFile(suffix="", prefix="", dir=None):
    """ 
    Return a temporary file that is available even after being closed on
    all platforms. It is automatically deleted when this object is deleted.
@ -55,6 +55,7 @@ def PersistentTemporaryFile(suffix="", prefix=""):
    """
    if prefix == None: 
        prefix = ""
-    fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix)
+    fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix,
+                                dir=dir)
    _file = os.fdopen(fd, "wb")
    return _TemporaryFileWrapper(_file, name)    
--- a/upload.py
+++ b/upload.py
@ -9,6 +9,7 @@ PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500"
 DOWNLOADS = PREFIX+"/httpdocs/downloads"
 DOCS = PREFIX+"/httpdocs/apidocs"
 HTML2LRF = "src/libprs500/ebooks/lrf/html/demo"
+TXT2LRF  = "src/libprs500/ebooks/lrf/txt/demo"
 check_call = partial(_check_call, shell=True)
 h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION)

@ -19,7 +20,7 @@ def build_windows():
    
    
    
-    vm = h.openVM('/mnt/extra/vmware/Windows Vista/Windows Vista.vmx')
+    vm = h.openVM('/mnt/backup/vmware/Windows Vista/Windows Vista.vmx')
    vm.powerOn() 
    if not vm.waitForToolsInGuest():
        print >>sys.stderr, 'Windows is not booting up'
@ -43,7 +44,7 @@ def build_osx():
    if os.path.exists('dist/dmgdone'):
        os.unlink('dist/dmgdone')
    
-    vm = h.openVM('/mnt/extra/vmware/Mac OSX/Mac OSX.vmx')
+    vm = h.openVM('/mnt/backup/vmware/Mac OSX/Mac OSX.vmx')
    vm.powerOn()
    c = 25 * 60
    print 'Waiting (minutes):',
@ -69,6 +70,8 @@ def upload_demo():
    f.close()
    check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,))
    check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
+    check_call('''txt2lrf -t 'Demonstration of txt2lrf' -a 'Kovid Goyal' --header -o /tmp/txt2lrf.lrf %s/demo.txt'''%(TXT2LRF,) )
+    check_call('''scp /tmp/txt2lrf.lrf castalia:%s/'''%(DOWNLOADS,))

 def upload_installers(exe, dmg):
    check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,))