Implement support for markdown in txt2lrf. Fix handling of <pre>, <th> elements in html2lrf

2025-07-07 10:14:46 -04:00 · 2007-06-19 15:13:52 +00:00 · 2007-06-19 15:13:52 +00:00 · 9178ddf18e
commit 9178ddf18e
parent a082131823
13 changed files with 2277 additions and 105 deletions
--- a/setup.py
+++ b/setup.py
@ -26,6 +26,7 @@ entry_points = {
                             'rtf-meta = libprs500.ebooks.metadata.rtf:main', \
                             'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \
                             'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\
                             'markdown = libprs500.ebooks.markdown.markdown:main',\
                           ], 
        'gui_scripts'    : [ APPNAME+' = libprs500.gui.main:main']
      }
--- a/src/libprs500/init.py
+++ b/src/libprs500/init.py
@ -13,7 +13,7 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ''' E-book management software'''
-__version__   = "0.3.52"
+__version__   = "0.3.53"
 __docformat__ = "epytext"
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"
 __appname__   = 'libprs500'
--- a/src/libprs500/ebooks/lrf/init.py
+++ b/src/libprs500/ebooks/lrf/init.py
@ -73,6 +73,16 @@ def option_parser(usage):
    profiles=['prs500'] 
    parser.add_option('-o', '--output', action='store', default=None, \
                      help='Output file name. Default is derived from input filename')
    laf = parser.add_option_group('LOOK AND FEEL')
    laf.add_option('--cover', action='store', dest='cover', default=None, \
                      help='Path to file containing image to be used as cover')
    laf.add_option('--font-delta', action='store', type='float', default=0., \
                      help="""Increase the font size by 2 * FONT_DELTA pts and """
                      '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
                      """If FONT_DELTA is negative, the font size is decreased.""",
                      dest='font_delta')
    laf.add_option('--disable-autorotation', action='store_true', default=False, 
                   help='Disable autorotation of images.', dest='disable_autorotation')
    page = parser.add_option_group('PAGE OPTIONS')
    page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
                      choices=profiles, action='callback', callback=profile_from_string,
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -221,6 +221,7 @@ class HTMLConverter(object):
                 chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
                 link_exclude=re.compile('$'), 
                 page_break=re.compile('h[12]', re.IGNORECASE),
                 force_page_break=re.compile('$', re.IGNORECASE),
                 profile=PRS500_PROFILE,
                 disable_autorotation=False):
        '''
@ -273,7 +274,8 @@ class HTMLConverter(object):
            small  = {'font-size'   :'small'},
            pre    = {'font-family' :'monospace' },
            tt     = {'font-family' :'monospace'},
-            center = {'text-align'  : 'center'}
+            center = {'text-align'  : 'center'},
            th     = {'font-size':'large', 'font-weight':'bold'},
            )        
        self.profile     = profile #: Defines the geometry of the display device
        self.chapter_detection = chapter_detection #: Flag to toggle chapter detection
@ -287,7 +289,8 @@ class HTMLConverter(object):
        self.blockquote_style = book.create_block_style(sidemargin=60, 
                                                        topskip=20, footskip=20)
        self.unindented_style = book.create_text_style(parindent=0)
-        self.page_break       = page_break #: Regex controlling forced page-break behavior
+        self.page_break       = page_break #: Regex controlling page-break behavior
        self.force_page_break = force_page_break #: Regex controlling forced page-break behavior
        self.text_styles      = []#: Keep track of already used textstyles
        self.block_styles     = []#: Keep track of already used blockstyles
        self.images  = {}         #: Images referenced in the HTML document
@ -559,6 +562,7 @@ class HTMLConverter(object):
                                     chapter_regex=self.chapter_regex,
                                     link_exclude=self.link_exclude,
                                     page_break=self.page_break,
                                     force_page_break=self.force_page_break,
                                     disable_autorotation=self.disable_autorotation)
                        HTMLConverter.processed_files[path] = self.files[path]
                    except Exception:
@ -829,6 +833,9 @@ class HTMLConverter(object):
           tag_css['page-break-after'].lower() != 'avoid':
            end_page = True
            tag_css.pop('page-break-after')
        if self.force_page_break.match(tagname):
            self.end_page()
            self.page_break_found = True
        if not self.page_break_found and self.page_break.match(tagname):
            if len(self.current_page.contents) > 3:
                self.end_page()
@ -956,6 +963,7 @@ class HTMLConverter(object):
                except ConversionError:
                    pass
            self.end_current_block()
            self.current_block = self.book.create_text_block()
        elif tagname in ['ul', 'ol']:
            self.in_ol = 1 if tagname == 'ol' else 0
            self.end_current_block()
@ -1138,13 +1146,15 @@ def process_file(path, options):
             re.compile('$')
        pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
             re.compile('$')
        fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
             re.compile('$')
        conv = HTMLConverter(book, path, profile=options.profile,
                             font_delta=options.font_delta, 
                             cover=cpath, max_link_levels=options.link_levels,
                             verbose=options.verbose, baen=options.baen, 
                             chapter_detection=options.chapter_detection,
                             chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
-                             link_exclude=re.compile(le), page_break=pb,
+                             link_exclude=re.compile(le), page_break=pb, force_page_break=fpb,
                             disable_autorotation=options.disable_autorotation)
        conv.process_links()
        oname = options.output
@ -1220,23 +1230,14 @@ def try_opf(path, options):
-def parse_options(argv=None, cli=True):
+def parse_options(argv=None, cli=True, parser=None):
    """ CLI for html -> lrf conversions """
    if not argv:
        argv = sys.argv[1:]
    if not parser:
        parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip]
         %prog converts mybook.html to mybook.lrf""")
    laf = parser.add_option_group('LOOK AND FEEL')
    laf.add_option('--cover', action='store', dest='cover', default=None, \
                      help='Path to file containing image to be used as cover')
    laf.add_option('--font-delta', action='store', type='float', default=0., \
                      help="""Increase the font size by 2 * FONT_DELTA pts and """
                      '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
                      """If FONT_DELTA is negative, the font size is decreased.""",
                      dest='font_delta')
    laf.add_option('--disable-autorotation', action='store_true', default=False, 
                   help='Disable autorotation of images.', dest='disable_autorotation')
    link = parser.add_option_group('LINK PROCESSING OPTIONS')
    link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
                      dest='link_levels',
@ -1265,6 +1266,8 @@ def parse_options(argv=None, cli=True):
                      '''there are no really long pages as this degrades the page '''
                      '''turn performance of the LRF. Thus this option is ignored '''
                      '''if the current page has only a few elements.''')
    chapter.add_option('--force-page-break-before', dest='force_page_break',
                       default='$', help='Like --page-break-before, but page breaks are forced.')
    prepro = parser.add_option_group('PREPROCESSING OPTIONS')
    prepro.add_option('--baen', action='store_true', default=False, dest='baen',
                      help='''Preprocess Baen HTML files to improve generated LRF.''')
@ -1285,7 +1288,8 @@ def main():
        if options.verbose:
            import warnings
            warnings.defaultaction = 'error'
-    except:        
+    except Exception, err:
        print >> sys.stderr, err
        sys.exit(1)    
    process_file(src, options)
--- a/src/libprs500/ebooks/lrf/html/table.py
+++ b/src/libprs500/ebooks/lrf/html/table.py
@ -12,7 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import math, sys
+import math, sys, re
 from libprs500.ebooks.lrf.fonts import get_font
 from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
@ -215,7 +215,7 @@ class Row(object):
    def __init__(self, conv, row, css, colpad):
        self.cells = []
        self.colpad = colpad
-        cells = row.findAll('td')
+        cells = row.findAll(re.compile('td|th'))
        for cell in cells:
            ccss = conv.tag_css(cell, css)
            self.cells.append(Cell(conv, cell, ccss))        
--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@ -15,19 +15,17 @@
 """
 Convert .txt files to .lrf
 """
-import os, sys
+import os, sys, codecs
 from libprs500.ebooks import BeautifulSoup
 from libprs500.ebooks.lrf import ConversionError, option_parser
 from libprs500.ebooks.lrf import Book
 from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
 from libprs500 import filename_to_utf8
 from libprs500 import iswindows
 from libprs500.ptempfile import PersistentTemporaryFile
 from libprs500.ebooks.lrf import ConversionError, option_parser
 from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
 from libprs500.ebooks.lrf.html.convert_from import process_file
 from libprs500.ebooks.markdown import markdown
-def parse_options(argv=None, cli=True):
+def parse_options(cli=True):
    """ CLI for txt -> lrf conversions """
    if not argv:
        argv = sys.argv[1:]
    parser = option_parser(
        """usage: %prog [options] mybook.txt
@ -44,84 +42,78 @@ def parse_options(argv=None, cli=True):
        if cli:
            parser.print_help()
        raise ConversionError, 'no filename specified'
    if options.title == None:
        options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
    return options, args, parser
 def generate_html(txtfile, encoding):
    '''
    Convert txtfile to html and return a PersistentTemporaryFile object pointing
    to the file with the HTML.
    '''
    encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
    if iswindows:
        encodings = ['cp1252'] + encodings
    if encoding not in ['cp1252', 'utf8']:
        encodings = [encoding] + encodings
    txt, enc = None, None
    for encoding in encodings:
        try:
            txt = codecs.open(txtfile, 'rb', encoding).read()
        except UnicodeDecodeError:
            continue
        enc = encoding
        break
    if txt == None:
        raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
    md = markdown.Markdown(txt,
                           extensions=['footnotes', 'tables', 'toc'],
                           encoding=enc,
                           safe_mode=False,
                           )
    html = md.toString().decode(enc)
    p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
    p.close()
    codecs.open(p.name, 'wb', enc).write(html)
    return p
 def main():
    try:
        options, args, parser = parse_options()
-        src = os.path.abspath(os.path.expanduser(args[0]))
+        txt = os.path.abspath(os.path.expanduser(args[0]))
-    except:        
+        p = generate_html(txt, options.encoding)
        for i in range(1, len(sys.argv)):
            if sys.argv[i] == args[0]:
                sys.argv.remove(sys.argv[i])
                break            
        sys.argv.append(p.name)
        sys.argv.append('--force-page-break-before')
        sys.argv.append('h2')
        o_spec = False
        for arg in sys.argv[1:]:
            arg = arg.lstrip()
            if arg.startswith('-o') or arg.startswith('--output'):
                o_spec = True
                break
        ext = '.lrf'
        for arg in sys.argv[1:]:
            if arg.strip() == '--lrs':
                ext = '.lrs'
                break
        if not o_spec:
            sys.argv.append('-o')
            sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext)
        options, args, parser = html_parse_options(parser=parser)
        src = args[0]
        if options.verbose:
            import warnings
            warnings.defaultaction = 'error'        
    except Exception, err:
        print >> sys.stderr, err
        import traceback
        traceback.print_exc()
        sys.exit(1)
-    print 'Output written to ', convert_txt(src, options)
+    process_file(src, options)
 def convert_txt(path, options):
    """
    Convert the text file at C{path} into an lrf file.
    @param options: Object with the following attributes:
                    C{author}, C{title}, C{encoding} (the assumed encoding of 
                    the text in C{path}.)
    """
    import codecs
    header = None
    if options.header:
        header = Paragraph()
        header.append(Bold(options.title))
        header.append(' by ')
        header.append(Italic(options.author))
    title = (options.title, options.title_sort)
    author = (options.author, options.author_sort)
    book = Book(options, header=header, title=title, author=author, \
                publisher=options.publisher,
                sourceencoding=options.encoding, freetext=options.freetext, \
                category=options.category, booksetting=BookSetting
                (dpi=10*options.profile.dpi,
                 screenheight=options.profile.screen_height, 
                 screenwidth=options.profile.screen_width))
    buffer = ''
    pg = book.create_page()
    block = book.create_text_block()
    pg.append(block)
    book.append(pg)
    lines = ""
    try:
        lines = codecs.open(path, 'rb', options.encoding).readlines()
    except UnicodeDecodeError:
            try:
                lines = codecs.open(path, 'rb', 'cp1252').readlines()
            except UnicodeDecodeError:
                try:
                    lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
                except UnicodeDecodeError:
                    try:
                        lines = codecs.open(path, 'rb', 'koi8_r').readlines()
                    except UnicodeDecodeError:
                        try:
                            lines = codecs.open(path, 'rb', 'koi8_u').readlines()
                        except UnicodeDecodeError:
                            lines = codecs.open(path, 'rb', 'utf8').readlines()
    for line in lines:
        line = line.strip()
        if line:
            buffer = buffer.rstrip() + ' ' + line
        else:
            block.Paragraph(buffer)            
            buffer = ''
    basename = os.path.basename(path)
    oname = options.output
    if not oname:
        oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf')
    oname = os.path.abspath(os.path.expanduser(oname))
    try: 
        book.renderLrs(oname) if options.lrs else book.renderLrf(oname)
    except UnicodeDecodeError:
        raise ConversionError(path + ' is not encoded in ' + \
                              options.encoding +'. Specify the '+ \
                              'correct encoding with the -e option.')
    return os.path.abspath(oname)
 if __name__ == '__main__':
    main()
--- a/src/libprs500/ebooks/markdown/init.py
+++ b/src/libprs500/ebooks/markdown/init.py
@ -0,0 +1,5 @@
 ''' Package defines lightweight markup language for processing of txt files'''
 # Initialize extensions
 from libprs500.ebooks.markdown import mdx_footnotes
 from libprs500.ebooks.markdown import mdx_tables
 from libprs500.ebooks.markdown import mdx_toc
--- a/src/libprs500/ebooks/markdown/markdown.py
+++ b/src/libprs500/ebooks/markdown/markdown.py
--- a/src/libprs500/ebooks/markdown/mdx_footnotes.py
+++ b/src/libprs500/ebooks/markdown/mdx_footnotes.py
@ -0,0 +1,255 @@
 """
 ## To see this file as plain text go to
 ## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
 ========================= FOOTNOTES =================================
 This section adds footnote handling to markdown.  It can be used as
 an example for extending python-markdown with relatively complex
 functionality.  While in this case the extension is included inside
 the module itself, it could just as easily be added from outside the
 module.  Not that all markdown classes above are ignorant about
 footnotes.  All footnote functionality is provided separately and
 then added to the markdown instance at the run time.
 Footnote functionality is attached by calling extendMarkdown()
 method of FootnoteExtension.  The method also registers the
 extension to allow it's state to be reset by a call to reset()
 method.
 """
 FN_BACKLINK_TEXT = "zz1337820767766393qq"
 import re, markdown, random
 class FootnoteExtension (markdown.Extension):
    DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
    SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
    def __init__ (self, configs) :
        self.config = {'PLACE_MARKER' :
                       ["///Footnotes Go Here///",
                        "The text string that marks where the footnotes go"]}
        for key, value in configs :
            self.config[key][0] = value
        self.reset()
    def extendMarkdown(self, md, md_globals) :
        self.md = md
        # Stateless extensions do not need to be registered
        md.registerExtension(self)
        # Insert a preprocessor before ReferencePreprocessor
        index = md.preprocessors.index(md_globals['REFERENCE_PREPROCESSOR'])
        preprocessor = FootnotePreprocessor(self)
        preprocessor.md = md
        md.preprocessors.insert(index, preprocessor)
        # Insert an inline pattern before ImageReferencePattern
        FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
        index = md.inlinePatterns.index(md_globals['IMAGE_REFERENCE_PATTERN'])
        md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
        # Insert a post-processor that would actually add the footnote div
        postprocessor = FootnotePostprocessor(self)
        postprocessor.extension = self
        md.postprocessors.append(postprocessor)
        textPostprocessor = FootnoteTextPostprocessor(self)
        md.textPostprocessors.append(textPostprocessor)
    def reset(self) :
        # May be called by Markdown is state reset is desired
        self.footnote_suffix = "-" + str(int(random.random()*1000000000))
        self.used_footnotes={}
        self.footnotes = {}
    def findFootnotesPlaceholder(self, doc) :
        def findFootnotePlaceholderFn(node=None, indent=0):
            if node.type == 'text':
                if node.value.find(self.getConfig("PLACE_MARKER")) > -1 :
                    return True
        fn_div_list = doc.find(findFootnotePlaceholderFn)
        if fn_div_list :
            return fn_div_list[0]
    def setFootnote(self, id, text) :
        self.footnotes[id] = text
    def makeFootnoteId(self, num) :
        return 'fn%d%s' % (num, self.footnote_suffix)
    def makeFootnoteRefId(self, num) :
        return 'fnr%d%s' % (num, self.footnote_suffix)
    def makeFootnotesDiv (self, doc) :
        """Creates the div with class='footnote' and populates it with
           the text of the footnotes.
           @returns: the footnote div as a dom element """
        if not self.footnotes.keys() :
            return None
        div = doc.createElement("div")
        div.setAttribute('class', 'footnote')
        hr = doc.createElement("hr")
        div.appendChild(hr)
        ol = doc.createElement("ol")
        div.appendChild(ol)
        footnotes = [(self.used_footnotes[id], id)
                     for id in self.footnotes.keys()]
        footnotes.sort()
        for i, id in footnotes :
            li = doc.createElement('li')
            li.setAttribute('id', self.makeFootnoteId(i))
            self.md._processSection(li, self.footnotes[id].split("\n"))
            #li.appendChild(doc.createTextNode(self.footnotes[id]))
            backlink = doc.createElement('a')
            backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
            backlink.setAttribute('class', 'footnoteBackLink')
            backlink.setAttribute('title',
                                  'Jump back to footnote %d in the text' % 1)
            backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
            if li.childNodes :
                node = li.childNodes[-1]
                if node.type == "text" :
                    node = li
                node.appendChild(backlink)
            ol.appendChild(li)
        return div
 class FootnotePreprocessor :
    def __init__ (self, footnotes) :
        self.footnotes = footnotes
    def run(self, lines) :
        self.blockGuru = markdown.BlockGuru()
        lines = self._handleFootnoteDefinitions (lines)
        # Make a hash of all footnote marks in the text so that we
        # know in what order they are supposed to appear.  (This
        # function call doesn't really substitute anything - it's just
        # a way to get a callback for each occurence.
        text = "\n".join(lines)
        self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
        return text.split("\n")
    def recordFootnoteUse(self, match) :
        id = match.group(1)
        id = id.strip()
        nextNum = len(self.footnotes.used_footnotes.keys()) + 1
        self.footnotes.used_footnotes[id] = nextNum
    def _handleFootnoteDefinitions(self, lines) :
        """Recursively finds all footnote definitions in the lines.
            @param lines: a list of lines of text
            @returns: a string representing the text with footnote
                      definitions removed """
        i, id, footnote = self._findFootnoteDefinition(lines)
        if id :
            plain = lines[:i]
            detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
            self.footnotes.setFootnote(id,
                                       footnote + "\n"
                                       + "\n".join(detabbed))
            more_plain = self._handleFootnoteDefinitions(theRest)
            return plain + [""] + more_plain
        else :
            return lines
    def _findFootnoteDefinition(self, lines) :
        """Finds the first line of a footnote definition.
            @param lines: a list of lines of text
            @returns: the index of the line containing a footnote definition """
        counter = 0
        for line in lines :
            m = self.footnotes.DEF_RE.match(line)
            if m :
                return counter, m.group(2), m.group(3)
            counter += 1
        return counter, None, None
 class FootnotePattern (markdown.Pattern) :
    def __init__ (self, pattern, footnotes) :
        markdown.Pattern.__init__(self, pattern)
        self.footnotes = footnotes
    def handleMatch(self, m, doc) :
        sup = doc.createElement('sup')
        a = doc.createElement('a')
        sup.appendChild(a)
        id = m.group(2)
        num = self.footnotes.used_footnotes[id]
        sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
        a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
        a.appendChild(doc.createTextNode(str(num)))
        return sup
 class FootnotePostprocessor (markdown.Postprocessor):
    def __init__ (self, footnotes) :
        self.footnotes = footnotes
    def run(self, doc) :
        footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
        if footnotesDiv :
            fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
            if fnPlaceholder :
                fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
            else :
                doc.documentElement.appendChild(footnotesDiv)
 class FootnoteTextPostprocessor (markdown.Postprocessor):
    def __init__ (self, footnotes) :
        self.footnotes = footnotes
    def run(self, text) :
        return text.replace(FN_BACKLINK_TEXT, "&#8617;")
 def makeExtension(configs=None) :
    return FootnoteExtension(configs=configs)
--- a/src/libprs500/ebooks/markdown/mdx_tables.py
+++ b/src/libprs500/ebooks/markdown/mdx_tables.py
@ -0,0 +1,65 @@
 #!/usr/bin/env python
 """
 Table extension for Python-Markdown
 """
 import markdown
 class TablePattern(markdown.Pattern) :
 	def __init__ (self, md):
 		markdown.Pattern.__init__(self, r'^\|([^\n]*)\|(\n|$)')
 		self.md = md
 	def handleMatch(self, m, doc) :
 		# a single line represents a row
 		tr = doc.createElement('tr')
 		tr.appendChild(doc.createTextNode('\n'))
 		# chunks between pipes represent cells
 		for t in m.group(2).split('|'):
 			if len(t) >= 2 and t.startswith('*') and t.endswith('*'):
 				# if a cell is bounded by asterisks, it is a <th>
 				td = doc.createElement('th')
 				t = t[1:-1]
 			else:
 				# otherwise it is a <td>
 				td = doc.createElement('td')
 			# apply inline patterns on chunks
 			for n in self.md._handleInline(t):
 				if(type(n) == unicode):
 					td.appendChild(doc.createTextNode(n))
 				else:
 					td.appendChild(n)
 			tr.appendChild(td)
 			# very long lines are evil
 			tr.appendChild(doc.createTextNode('\n'))
 		return tr
 class TablePostprocessor:
 	def run(self, doc):
 		# markdown wrapped our <tr>s in a <p>, we fix that here
 		def test_for_p(element):
 			return element.type == 'element' and element.nodeName == 'p'
 		# replace "p > tr" with "table > tr"
 		for element in doc.find(test_for_p):
 			for node in element.childNodes:
 				if(node.type == 'text' and node.value.strip() == ''):
 					# skip leading whitespace
 					continue
 				if (node.type == 'element' and node.nodeName == 'tr'):
 					element.nodeName = 'table'
 				break
 class TableExtension(markdown.Extension):
 	def extendMarkdown(self, md, md_globals):
 		md.inlinePatterns.insert(0, TablePattern(md))
 		md.postprocessors.append(TablePostprocessor())
 def makeExtension(configs):
 	return TableExtension(configs)
--- a/src/libprs500/ebooks/markdown/mdx_toc.py
+++ b/src/libprs500/ebooks/markdown/mdx_toc.py
@ -0,0 +1,165 @@
 ## To access this file as plain text go to
 ## http://freewisdom.org/projects/python-markdown/mdx_toc.raw_content
 """
 Chris Clark - clach04 -at- sf.net
 My markdown extensions for adding:
    Table of Contents (aka toc)
 """
 import os
 import sys
 import re
 import markdown
 DEFAULT_TITLE = None
 def extract_alphanumeric(in_str=None):
    """take alpha-numeric (7bit ascii) and return as a string
    """
    # I'm sure this is really inefficient and 
    # could be done with a lambda/map()
    #x.strip().title().replace(' ', "")
    out_str=[]
    for x in in_str.title():
        if x.isalnum(): out_str.append(x)
    return ''.join(out_str)
 class TitlePostprocessor (markdown.Postprocessor):
    def __init__ (self, extension) :
        self.extension = extension
    def run(self, doc) :
        titleElement = self.extension.createTitle(doc)
        if titleElement :
            doc.documentElement.insertChild(0, titleElement)
 class TocExtension (markdown.Extension):
    """Markdown extension: generate a Table Of Contents (aka toc)
    toc is returned in a div tag with class='toc'
    toc is either:
        appended to end of document
      OR 
        replaces first string occurence of "///Table of Contents Goes Here///"
    """
    def __init__ (self) :
        #maybe add these as parameters to the class init?
        self.TOC_INCLUDE_MARKER = "///Table of Contents///"
        self.TOC_TITLE = "Table Of Contents"
        self.auto_toc_heading_type=2
        self.toc_heading_type=3
    def extendMarkdown(self, md, md_globals) :
        # Just insert in the end
        md.postprocessors.append(TocPostprocessor(self))
        # Stateless extensions do not need to be registered, so we don't
        # register.
    def findTocPlaceholder(self, doc) :
        def findTocPlaceholderFn(node=None, indent=0):
            if node.type == 'text':
                if node.value.find(self.TOC_INCLUDE_MARKER) > -1 :
                    return True
        toc_div_list = doc.find(findTocPlaceholderFn)
        if toc_div_list :
            return toc_div_list[0]
    def createTocDiv(self, doc) :
        """
           Creates Table Of Contents based on headers.
           @returns: toc as a single as a dom element 
                     in a <div> tag with class='toc'
        """
        # Find headers
        headers_compiled_re = re.compile("h[123456]", re.IGNORECASE)
        def findHeadersFn(element=None):
            if element.type=='element':
                if headers_compiled_re.match(element.nodeName):
                    return True
        headers_doc_list = doc.find(findHeadersFn)
        # Insert anchor tags into dom
        generated_anchor_id=0
        headers_list=[]
        min_header_size_found = 6
        for element in headers_doc_list:
            heading_title = element.childNodes[0].value
            if heading_title.strip() !="":
                heading_type = int(element.nodeName[-1:])
                if heading_type == self.auto_toc_heading_type:
                    min_header_size_found=min(min_header_size_found,
                                              heading_type)
                html_anchor_name= (extract_alphanumeric(heading_title)
                                   +'__MD_autoTOC_%d' % (generated_anchor_id))
                # insert anchor tag inside header tags
                html_anchor = doc.createElement("a")
                html_anchor.setAttribute('name', html_anchor_name)
                element.appendChild(html_anchor)
                headers_list.append( (heading_type, heading_title,
                                      html_anchor_name) )
                generated_anchor_id = generated_anchor_id + 1
        # create dom for toc
        if headers_list != []:
            # Create list
            toc_doc_list = doc.createElement("ul")
            for (heading_type, heading_title, html_anchor_name) in headers_list:
                if heading_type == self.auto_toc_heading_type:
                    toc_doc_entry = doc.createElement("li")
                    toc_doc_link = doc.createElement("a")
                    toc_doc_link.setAttribute('href', '#'+html_anchor_name)
                    toc_doc_text = doc.createTextNode(heading_title)
                    toc_doc_link.appendChild(toc_doc_text)
                    toc_doc_entry.appendChild(toc_doc_link)
                    toc_doc_list.appendChild(toc_doc_entry)
            # Put list into div            
            div = doc.createElement("div")
            div.setAttribute('class', 'toc')
            if self.TOC_TITLE:
                toc_header = doc.createElement("h%d"%(self.toc_heading_type) )
                toc_header_text = doc.createTextNode(self.TOC_TITLE)
                toc_header.appendChild(toc_header_text)
                div.appendChild(toc_header)
            div.appendChild(toc_doc_list)
            #hr = doc.createElement("hr")
            #div.appendChild(hr)
            return div
 class TocPostprocessor (markdown.Postprocessor):
    def __init__ (self, toc) :
        self.toc = toc
    def run(self, doc):
        tocPlaceholder = self.toc.findTocPlaceholder(doc)
        tocDiv = self.toc.createTocDiv(doc)
        if tocDiv:
            if tocPlaceholder :
                # Replace "magic" pattern with toc
                tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
            else :
                # Dump at the end of the DOM
                # Probably want to use CSS to position div
                doc.documentElement.appendChild(tocDiv)
 def makeExtension(configs=None) :
    return TocExtension()
--- a/src/libprs500/ptempfile.py
+++ b/src/libprs500/ptempfile.py
@ -47,7 +47,7 @@ class _TemporaryFileWrapper(object):
            os.remove(self.name)
-def PersistentTemporaryFile(suffix="", prefix=""):
+def PersistentTemporaryFile(suffix="", prefix="", dir=None):
    """ 
    Return a temporary file that is available even after being closed on
    all platforms. It is automatically deleted when this object is deleted.
@ -55,6 +55,7 @@ def PersistentTemporaryFile(suffix="", prefix=""):
    """
    if prefix == None: 
        prefix = ""
-    fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix)
+    fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix,
                                dir=dir)
    _file = os.fdopen(fd, "wb")
    return _TemporaryFileWrapper(_file, name)    
--- a/upload.py
+++ b/upload.py
@ -9,6 +9,7 @@ PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500"
 DOWNLOADS = PREFIX+"/httpdocs/downloads"
 DOCS = PREFIX+"/httpdocs/apidocs"
 HTML2LRF = "src/libprs500/ebooks/lrf/html/demo"
 TXT2LRF  = "src/libprs500/ebooks/lrf/txt/demo"
 check_call = partial(_check_call, shell=True)
 h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION)
@ -19,7 +20,7 @@ def build_windows():
-    vm = h.openVM('/mnt/extra/vmware/Windows Vista/Windows Vista.vmx')
+    vm = h.openVM('/mnt/backup/vmware/Windows Vista/Windows Vista.vmx')
    vm.powerOn() 
    if not vm.waitForToolsInGuest():
        print >>sys.stderr, 'Windows is not booting up'
@ -43,7 +44,7 @@ def build_osx():
    if os.path.exists('dist/dmgdone'):
        os.unlink('dist/dmgdone')
-    vm = h.openVM('/mnt/extra/vmware/Mac OSX/Mac OSX.vmx')
+    vm = h.openVM('/mnt/backup/vmware/Mac OSX/Mac OSX.vmx')
    vm.powerOn()
    c = 25 * 60
    print 'Waiting (minutes):',
@ -69,6 +70,8 @@ def upload_demo():
    f.close()
    check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,))
    check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
    check_call('''txt2lrf -t 'Demonstration of txt2lrf' -a 'Kovid Goyal' --header -o /tmp/txt2lrf.lrf %s/demo.txt'''%(TXT2LRF,) )
    check_call('''scp /tmp/txt2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
 def upload_installers(exe, dmg):
    check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,))