diff --git a/setup.py b/setup.py index fd50aa9550..144aa42eec 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ entry_points = { 'rtf-meta = libprs500.ebooks.metadata.rtf:main', \ 'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \ 'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\ + 'markdown = libprs500.ebooks.markdown.markdown:main',\ ], 'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main'] } diff --git a/src/libprs500/__init__.py b/src/libprs500/__init__.py index ebac7179d9..354d9bb46e 100644 --- a/src/libprs500/__init__.py +++ b/src/libprs500/__init__.py @@ -13,7 +13,7 @@ ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ''' E-book management software''' -__version__ = "0.3.52" +__version__ = "0.3.53" __docformat__ = "epytext" __author__ = "Kovid Goyal " __appname__ = 'libprs500' diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index 308be6c135..0dd5d78b07 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -70,9 +70,19 @@ def option_parser(usage): help='Sort key for the author') metadata.add_option('--publisher', action='store', default='Unknown', dest='publisher', help='Publisher') - profiles=['prs500'] + profiles=['prs500'] parser.add_option('-o', '--output', action='store', default=None, \ help='Output file name. Default is derived from input filename') + laf = parser.add_option_group('LOOK AND FEEL') + laf.add_option('--cover', action='store', dest='cover', default=None, \ + help='Path to file containing image to be used as cover') + laf.add_option('--font-delta', action='store', type='float', default=0., \ + help="""Increase the font size by 2 * FONT_DELTA pts and """ + '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.''' + """If FONT_DELTA is negative, the font size is decreased.""", + dest='font_delta') + laf.add_option('--disable-autorotation', action='store_true', default=False, + help='Disable autorotation of images.', dest='disable_autorotation') page = parser.add_option_group('PAGE OPTIONS') page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice', choices=profiles, action='callback', callback=profile_from_string, diff --git a/src/libprs500/ebooks/lrf/html/convert_from.py b/src/libprs500/ebooks/lrf/html/convert_from.py index 4cf107baee..714d7e05d1 100644 --- a/src/libprs500/ebooks/lrf/html/convert_from.py +++ b/src/libprs500/ebooks/lrf/html/convert_from.py @@ -221,6 +221,7 @@ class HTMLConverter(object): chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE), link_exclude=re.compile('$'), page_break=re.compile('h[12]', re.IGNORECASE), + force_page_break=re.compile('$', re.IGNORECASE), profile=PRS500_PROFILE, disable_autorotation=False): ''' @@ -273,7 +274,8 @@ class HTMLConverter(object): small = {'font-size' :'small'}, pre = {'font-family' :'monospace' }, tt = {'font-family' :'monospace'}, - center = {'text-align' : 'center'} + center = {'text-align' : 'center'}, + th = {'font-size':'large', 'font-weight':'bold'}, ) self.profile = profile #: Defines the geometry of the display device self.chapter_detection = chapter_detection #: Flag to toggle chapter detection @@ -287,7 +289,8 @@ class HTMLConverter(object): self.blockquote_style = book.create_block_style(sidemargin=60, topskip=20, footskip=20) self.unindented_style = book.create_text_style(parindent=0) - self.page_break = page_break #: Regex controlling forced page-break behavior + self.page_break = page_break #: Regex controlling page-break behavior + self.force_page_break = force_page_break #: Regex controlling forced page-break behavior self.text_styles = []#: Keep track of already used textstyles self.block_styles = []#: Keep track of already used blockstyles self.images = {} #: Images referenced in the HTML document @@ -559,6 +562,7 @@ class HTMLConverter(object): chapter_regex=self.chapter_regex, link_exclude=self.link_exclude, page_break=self.page_break, + force_page_break=self.force_page_break, disable_autorotation=self.disable_autorotation) HTMLConverter.processed_files[path] = self.files[path] except Exception: @@ -829,6 +833,9 @@ class HTMLConverter(object): tag_css['page-break-after'].lower() != 'avoid': end_page = True tag_css.pop('page-break-after') + if self.force_page_break.match(tagname): + self.end_page() + self.page_break_found = True if not self.page_break_found and self.page_break.match(tagname): if len(self.current_page.contents) > 3: self.end_page() @@ -956,6 +963,7 @@ class HTMLConverter(object): except ConversionError: pass self.end_current_block() + self.current_block = self.book.create_text_block() elif tagname in ['ul', 'ol']: self.in_ol = 1 if tagname == 'ol' else 0 self.end_current_block() @@ -1138,13 +1146,15 @@ def process_file(path, options): re.compile('$') pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \ re.compile('$') + fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \ + re.compile('$') conv = HTMLConverter(book, path, profile=options.profile, font_delta=options.font_delta, cover=cpath, max_link_levels=options.link_levels, verbose=options.verbose, baen=options.baen, chapter_detection=options.chapter_detection, chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE), - link_exclude=re.compile(le), page_break=pb, + link_exclude=re.compile(le), page_break=pb, force_page_break=fpb, disable_autorotation=options.disable_autorotation) conv.process_links() oname = options.output @@ -1220,23 +1230,14 @@ def try_opf(path, options): -def parse_options(argv=None, cli=True): +def parse_options(argv=None, cli=True, parser=None): """ CLI for html -> lrf conversions """ if not argv: argv = sys.argv[1:] - parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip] + if not parser: + parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip] %prog converts mybook.html to mybook.lrf""") - laf = parser.add_option_group('LOOK AND FEEL') - laf.add_option('--cover', action='store', dest='cover', default=None, \ - help='Path to file containing image to be used as cover') - laf.add_option('--font-delta', action='store', type='float', default=0., \ - help="""Increase the font size by 2 * FONT_DELTA pts and """ - '''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.''' - """If FONT_DELTA is negative, the font size is decreased.""", - dest='font_delta') - laf.add_option('--disable-autorotation', action='store_true', default=False, - help='Disable autorotation of images.', dest='disable_autorotation') link = parser.add_option_group('LINK PROCESSING OPTIONS') link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ dest='link_levels', @@ -1265,6 +1266,8 @@ def parse_options(argv=None, cli=True): '''there are no really long pages as this degrades the page ''' '''turn performance of the LRF. Thus this option is ignored ''' '''if the current page has only a few elements.''') + chapter.add_option('--force-page-break-before', dest='force_page_break', + default='$', help='Like --page-break-before, but page breaks are forced.') prepro = parser.add_option_group('PREPROCESSING OPTIONS') prepro.add_option('--baen', action='store_true', default=False, dest='baen', help='''Preprocess Baen HTML files to improve generated LRF.''') @@ -1285,7 +1288,8 @@ def main(): if options.verbose: import warnings warnings.defaultaction = 'error' - except: + except Exception, err: + print >> sys.stderr, err sys.exit(1) process_file(src, options) diff --git a/src/libprs500/ebooks/lrf/html/table.py b/src/libprs500/ebooks/lrf/html/table.py index 23450de888..74b8e4a8a4 100644 --- a/src/libprs500/ebooks/lrf/html/table.py +++ b/src/libprs500/ebooks/lrf/html/table.py @@ -12,7 +12,7 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import math, sys +import math, sys, re from libprs500.ebooks.lrf.fonts import get_font from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \ @@ -215,7 +215,7 @@ class Row(object): def __init__(self, conv, row, css, colpad): self.cells = [] self.colpad = colpad - cells = row.findAll('td') + cells = row.findAll(re.compile('td|th')) for cell in cells: ccss = conv.tag_css(cell, css) self.cells.append(Cell(conv, cell, ccss)) diff --git a/src/libprs500/ebooks/lrf/txt/convert_from.py b/src/libprs500/ebooks/lrf/txt/convert_from.py index 19a3dcce47..84d81dd479 100644 --- a/src/libprs500/ebooks/lrf/txt/convert_from.py +++ b/src/libprs500/ebooks/lrf/txt/convert_from.py @@ -15,19 +15,17 @@ """ Convert .txt files to .lrf """ -import os, sys +import os, sys, codecs -from libprs500.ebooks import BeautifulSoup -from libprs500.ebooks.lrf import ConversionError, option_parser -from libprs500.ebooks.lrf import Book -from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting -from libprs500 import filename_to_utf8 from libprs500 import iswindows +from libprs500.ptempfile import PersistentTemporaryFile +from libprs500.ebooks.lrf import ConversionError, option_parser +from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options +from libprs500.ebooks.lrf.html.convert_from import process_file +from libprs500.ebooks.markdown import markdown -def parse_options(argv=None, cli=True): +def parse_options(cli=True): """ CLI for txt -> lrf conversions """ - if not argv: - argv = sys.argv[1:] parser = option_parser( """usage: %prog [options] mybook.txt @@ -44,84 +42,78 @@ def parse_options(argv=None, cli=True): if cli: parser.print_help() raise ConversionError, 'no filename specified' - if options.title == None: - options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0]) return options, args, parser +def generate_html(txtfile, encoding): + ''' + Convert txtfile to html and return a PersistentTemporaryFile object pointing + to the file with the HTML. + ''' + encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8'] + if iswindows: + encodings = ['cp1252'] + encodings + if encoding not in ['cp1252', 'utf8']: + encodings = [encoding] + encodings + txt, enc = None, None + for encoding in encodings: + try: + txt = codecs.open(txtfile, 'rb', encoding).read() + except UnicodeDecodeError: + continue + enc = encoding + break + if txt == None: + raise ConversionError, 'Could not detect encoding of %s'%(txtfile,) + md = markdown.Markdown(txt, + extensions=['footnotes', 'tables', 'toc'], + encoding=enc, + safe_mode=False, + ) + html = md.toString().decode(enc) + p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile)) + p.close() + codecs.open(p.name, 'wb', enc).write(html) + return p + def main(): try: options, args, parser = parse_options() - src = os.path.abspath(os.path.expanduser(args[0])) - except: - sys.exit(1) - print 'Output written to ', convert_txt(src, options) + txt = os.path.abspath(os.path.expanduser(args[0])) + p = generate_html(txt, options.encoding) + for i in range(1, len(sys.argv)): + if sys.argv[i] == args[0]: + sys.argv.remove(sys.argv[i]) + break + sys.argv.append(p.name) + sys.argv.append('--force-page-break-before') + sys.argv.append('h2') + o_spec = False + for arg in sys.argv[1:]: + arg = arg.lstrip() + if arg.startswith('-o') or arg.startswith('--output'): + o_spec = True + break + ext = '.lrf' + for arg in sys.argv[1:]: + if arg.strip() == '--lrs': + ext = '.lrs' + break + if not o_spec: + sys.argv.append('-o') + sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext) + options, args, parser = html_parse_options(parser=parser) + src = args[0] + if options.verbose: + import warnings + warnings.defaultaction = 'error' + except Exception, err: + print >> sys.stderr, err + import traceback + traceback.print_exc() + sys.exit(1) + process_file(src, options) -def convert_txt(path, options): - """ - Convert the text file at C{path} into an lrf file. - @param options: Object with the following attributes: - C{author}, C{title}, C{encoding} (the assumed encoding of - the text in C{path}.) - """ - import codecs - header = None - if options.header: - header = Paragraph() - header.append(Bold(options.title)) - header.append(' by ') - header.append(Italic(options.author)) - title = (options.title, options.title_sort) - author = (options.author, options.author_sort) - book = Book(options, header=header, title=title, author=author, \ - publisher=options.publisher, - sourceencoding=options.encoding, freetext=options.freetext, \ - category=options.category, booksetting=BookSetting - (dpi=10*options.profile.dpi, - screenheight=options.profile.screen_height, - screenwidth=options.profile.screen_width)) - buffer = '' - pg = book.create_page() - block = book.create_text_block() - pg.append(block) - book.append(pg) - lines = "" - try: - lines = codecs.open(path, 'rb', options.encoding).readlines() - except UnicodeDecodeError: - try: - lines = codecs.open(path, 'rb', 'cp1252').readlines() - except UnicodeDecodeError: - try: - lines = codecs.open(path, 'rb', 'iso-8859-1').readlines() - except UnicodeDecodeError: - try: - lines = codecs.open(path, 'rb', 'koi8_r').readlines() - except UnicodeDecodeError: - try: - lines = codecs.open(path, 'rb', 'koi8_u').readlines() - except UnicodeDecodeError: - lines = codecs.open(path, 'rb', 'utf8').readlines() - for line in lines: - line = line.strip() - if line: - buffer = buffer.rstrip() + ' ' + line - else: - block.Paragraph(buffer) - buffer = '' - basename = os.path.basename(path) - oname = options.output - if not oname: - oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf') - oname = os.path.abspath(os.path.expanduser(oname)) - try: - book.renderLrs(oname) if options.lrs else book.renderLrf(oname) - except UnicodeDecodeError: - raise ConversionError(path + ' is not encoded in ' + \ - options.encoding +'. Specify the '+ \ - 'correct encoding with the -e option.') - return os.path.abspath(oname) - if __name__ == '__main__': main() \ No newline at end of file diff --git a/src/libprs500/ebooks/markdown/__init__.py b/src/libprs500/ebooks/markdown/__init__.py new file mode 100644 index 0000000000..98ad30583d --- /dev/null +++ b/src/libprs500/ebooks/markdown/__init__.py @@ -0,0 +1,5 @@ +''' Package defines lightweight markup language for processing of txt files''' +# Initialize extensions +from libprs500.ebooks.markdown import mdx_footnotes +from libprs500.ebooks.markdown import mdx_tables +from libprs500.ebooks.markdown import mdx_toc \ No newline at end of file diff --git a/src/libprs500/ebooks/markdown/markdown.py b/src/libprs500/ebooks/markdown/markdown.py new file mode 100644 index 0000000000..b0996c2926 --- /dev/null +++ b/src/libprs500/ebooks/markdown/markdown.py @@ -0,0 +1,1671 @@ +#!/usr/bin/env python + +# The following constant specifies the name used in the usage +# statement displayed for python versions lower than 2.3. (With +# python2.3 and higher the usage statement is generated by optparse +# and uses the actual name of the executable called.) + +EXECUTABLE_NAME_FOR_USAGE = "python markdown.py" + +SPEED_TEST = 0 + +""" +==================================================================== +IF YOA ARE LOOKING TO EXTEND MARKDOWN, SEE THE "FOOTNOTES" SECTION +==================================================================== + +Python-Markdown +=============== + +Converts Markdown to HTML. Basic usage as a module: + + import markdown + html = markdown.markdown(your_text_string) + +Started by [Manfred Stienstra](http://www.dwerg.net/). Continued and +maintained by [Yuri Takhteyev](http://www.freewisdom.org). + +Project website: http://www.freewisdom.org/projects/python-markdown +Contact: yuri [at] freewisdom.org + +License: GPL 2 (http://www.gnu.org/copyleft/gpl.html) or BSD + +Version: 1.5a (July 9, 2006) + +For changelog, see end of file +""" + +import re, sys, os, random, codecs + +# set debug level: 3 none, 2 critical, 1 informative, 0 all +(VERBOSE, INFO, CRITICAL, NONE) = range(4) + +MESSAGE_THRESHOLD = CRITICAL + +def message(level, text) : + if level >= MESSAGE_THRESHOLD : + print text + + +# --------------- CONSTANTS YOU MIGHT WANT TO MODIFY ----------------- + +# all tabs will be expanded to up to this many spaces +TAB_LENGTH = 4 +ENABLE_ATTRIBUTES = 1 +SMART_EMPHASIS = 1 + +# --------------- CONSTANTS YOU _SHOULD NOT_ HAVE TO CHANGE ---------- + +# a template for html placeholders +HTML_PLACEHOLDER_PREFIX = "qaodmasdkwaspemas" +HTML_PLACEHOLDER = HTML_PLACEHOLDER_PREFIX + "%dajkqlsmdqpakldnzsdfls" + +BLOCK_LEVEL_ELEMENTS = ['p', 'div', 'blockquote', 'pre', 'table', + 'dl', 'ol', 'ul', 'script', 'noscript', + 'form', 'fieldset', 'iframe', 'math', 'ins', + 'del', 'hr', 'hr/', 'style'] + +def is_block_level (tag) : + return ( (tag in BLOCK_LEVEL_ELEMENTS) or + (tag[0] == 'h' and tag[1] in "0123456789") ) + +""" +====================================================================== +========================== NANODOM =================================== +====================================================================== + +The three classes below implement some of the most basic DOM +methods. I use this instead of minidom because I need a simpler +functionality and do not want to require additional libraries. + +Importantly, NanoDom does not do normalization, which is what we +want. It also adds extra white space when converting DOM to string +""" + + +class Document : + + def appendChild(self, child) : + self.documentElement = child + child.parent = self + self.entities = {} + + def createElement(self, tag, textNode=None) : + el = Element(tag) + el.doc = self + if textNode : + el.appendChild(self.createTextNode(textNode)) + return el + + def createTextNode(self, text) : + node = TextNode(text) + node.doc = self + return node + + def createEntityReference(self, entity): + if entity not in self.entities: + self.entities[entity] = EntityReference(entity) + return self.entities[entity] + + def toxml (self) : + return self.documentElement.toxml() + + def normalizeEntities(self, text) : + + pairs = [ ("&", "&"), + ("<", "<"), + (">", ">"), + ("\"", """)] + + + for old, new in pairs : + text = text.replace(old, new) + return text + + def find(self, test) : + return self.documentElement.find(test) + + def unlink(self) : + self.documentElement.unlink() + self.documentElement = None + + +class Element : + + type = "element" + + def __init__ (self, tag) : + + self.nodeName = tag + self.attributes = [] + self.attribute_values = {} + self.childNodes = [] + + def unlink(self) : + for child in self.childNodes : + if child.type == "element" : + child.unlink() + self.childNodes = None + + def setAttribute(self, attr, value) : + if not attr in self.attributes : + self.attributes.append(attr) + + self.attribute_values[attr] = value + + def insertChild(self, position, child) : + self.childNodes.insert(position, child) + child.parent = self + + def removeChild(self, child) : + self.childNodes.remove(child) + + def replaceChild(self, oldChild, newChild) : + position = self.childNodes.index(oldChild) + self.removeChild(oldChild) + self.insertChild(position, newChild) + + def appendChild(self, child) : + self.childNodes.append(child) + child.parent = self + + def handleAttributes(self) : + pass + + def find(self, test, depth=0) : + """ Returns a list of descendants that pass the test function """ + matched_nodes = [] + for child in self.childNodes : + if test(child) : + matched_nodes.append(child) + if child.type == "element" : + matched_nodes += child.find(test, depth+1) + return matched_nodes + + def toxml(self): + if ENABLE_ATTRIBUTES : + for child in self.childNodes: + child.handleAttributes() + buffer = "" + if self.nodeName in ['h1', 'h2', 'h3', 'h4'] : + buffer += "\n" + elif self.nodeName in ['li'] : + buffer += "\n " + buffer += "<" + self.nodeName + for attr in self.attributes : + value = self.attribute_values[attr] + value = self.doc.normalizeEntities(value) + buffer += ' %s="%s"' % (attr, value) + if self.childNodes or self.nodeName in ['blockquote']: + buffer += ">" + for child in self.childNodes : + buffer += child.toxml() + if self.nodeName == 'p' : + buffer += "\n" + elif self.nodeName == 'li' : + buffer += "\n " + buffer += "" % self.nodeName + else : + buffer += "/>" + if self.nodeName in ['p', 'li', 'ul', 'ol', + 'h1', 'h2', 'h3', 'h4'] : + buffer += "\n" + + return buffer + + +class TextNode : + + type = "text" + attrRegExp = re.compile(r'\{@([^\}]*)=([^\}]*)}') # {@id=123} + + def __init__ (self, text) : + self.value = text + + def attributeCallback(self, match) : + self.parent.setAttribute(match.group(1), match.group(2)) + + def handleAttributes(self) : + self.value = self.attrRegExp.sub(self.attributeCallback, self.value) + + def toxml(self) : + text = self.value + if not text.startswith(HTML_PLACEHOLDER_PREFIX): + if self.parent.nodeName == "p" : + text = text.replace("\n", "\n ") + elif (self.parent.nodeName == "li" + and self.parent.childNodes[0]==self): + text = "\n " + text.replace("\n", "\n ") + text = self.doc.normalizeEntities(text) + return text + + +class EntityReference: + + type = "entity_ref" + + def __init__(self, entity): + self.entity = entity + + def handleAttributes(self): + pass + + def toxml(self): + return "&" + self.entity + ";" + + +""" +====================================================================== +========================== PRE-PROCESSORS ============================ +====================================================================== + +Preprocessors munge source text before we start doing anything too +complicated. + +Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document, +modifies it as necessary and returns either the same pointer or a +pointer to a new list. Preprocessors must extend +markdown.Preprocessor. + +""" + + +class Preprocessor : + pass + + +class HeaderPreprocessor (Preprocessor): + + """ + Replaces underlined headers with hashed headers to avoid + the nead for lookahead later. + """ + + def run (self, lines) : + + i = -1 + while i+1 < len(lines) : + i = i+1 + if not lines[i].strip() : + continue + + if lines[i].startswith("#") : + lines.insert(i+1, "\n") + + if (i+1 <= len(lines) + and lines[i+1] + and lines[i+1][0] in ['-', '=']) : + + underline = lines[i+1].strip() + + if underline == "="*len(underline) : + lines[i] = "# " + lines[i].strip() + lines[i+1] = "" + elif underline == "-"*len(underline) : + lines[i] = "## " + lines[i].strip() + lines[i+1] = "" + + #for l in lines : + # print l.encode('utf8') + #sys.exit(0) + + return lines + +HEADER_PREPROCESSOR = HeaderPreprocessor() + +class LinePreprocessor (Preprocessor): + """Deals with HR lines (needs to be done before processing lists)""" + + def run (self, lines) : + for i in range(len(lines)) : + if self._isLine(lines[i]) : + lines[i] = "
" + return lines + + def _isLine(self, block) : + """Determines if a block should be replaced with an
""" + if block.startswith(" ") : return 0 # a code block + text = "".join([x for x in block if not x.isspace()]) + if len(text) <= 2 : + return 0 + for pattern in ['isline1', 'isline2', 'isline3'] : + m = RE.regExp[pattern].match(text) + if (m and m.group(1)) : + return 1 + else: + return 0 + +LINE_PREPROCESSOR = LinePreprocessor() + + +class LineBreaksPreprocessor (Preprocessor): + """Replaces double spaces at the end of the lines with
.""" + + def run (self, lines) : + for i in range(len(lines)) : + if (lines[i].endswith(" ") + and not RE.regExp['tabbed'].match(lines[i]) ): + lines[i] += "
" + return lines + +LINE_BREAKS_PREPROCESSOR = LineBreaksPreprocessor() + + +class HtmlBlockPreprocessor (Preprocessor): + """Removes html blocks from self.lines""" + + def _get_left_tag(self, block): + return block[1:].replace(">", " ", 1).split()[0].lower() + + + def _get_right_tag(self, left_tag, block): + return block.rstrip()[-len(left_tag)-2:-1].lower() + + def _equal_tags(self, left_tag, right_tag): + if left_tag in ['?', '?php', 'div'] : # handle PHP, etc. + return True + if ("/" + left_tag) == right_tag: + return True + elif left_tag == right_tag[1:] \ + and right_tag[0] != "<": + return True + else: + return False + + def _is_oneliner(self, tag): + return (tag in ['hr', 'hr/']) + + + def run (self, lines) : + new_blocks = [] + text = "\n".join(lines) + text = text.split("\n\n") + + items = [] + left_tag = '' + right_tag = '' + in_tag = False # flag + + for block in text: + if block.startswith("\n") : + block = block[1:] + + if not in_tag: + + if block.startswith("<"): + + left_tag = self._get_left_tag(block) + right_tag = self._get_right_tag(left_tag, block) + + if not (is_block_level(left_tag) \ + or block[1] in ["!", "?", "@", "%"]): + new_blocks.append(block) + continue + + if self._is_oneliner(left_tag): + new_blocks.append(block.strip()) + continue + + if block[1] == "!": + # is a comment block + left_tag = "--" + right_tag = self._get_right_tag(left_tag, block) + # keep checking conditions below and maybe just append + + if block.rstrip().endswith(">") \ + and self._equal_tags(left_tag, right_tag): + new_blocks.append( + self.stash.store(block.strip())) + continue + elif not block[1] == "!": + # if is block level tag and is not complete + items.append(block.strip()) + in_tag = True + continue + + new_blocks.append(block) + + else: + items.append(block.strip()) + + right_tag = self._get_right_tag(left_tag, block) + if self._equal_tags(left_tag, right_tag): + # if find closing tag + in_tag = False + new_blocks.append( + self.stash.store('\n\n'.join(items))) + items = [] + + return "\n\n".join(new_blocks).split("\n") + +HTML_BLOCK_PREPROCESSOR = HtmlBlockPreprocessor() + + +class ReferencePreprocessor (Preprocessor): + + def run (self, lines) : + + new_text = []; + for line in lines: + m = RE.regExp['reference-def'].match(line) + if m: + id = m.group(2).strip().lower() + t = m.group(4).strip() # potential title + if not t : + self.references[id] = (m.group(3), t) + elif (len(t) >= 2 + and (t[0] == t[-1] == "\"" + or t[0] == t[-1] == "\'" + or (t[0] == "(" and t[-1] == ")") ) ) : + self.references[id] = (m.group(3), t[1:-1]) + else : + new_text.append(line) + else: + new_text.append(line) + + return new_text #+ "\n" + +REFERENCE_PREPROCESSOR = ReferencePreprocessor() + +""" +====================================================================== +========================== INLINE PATTERNS =========================== +====================================================================== + +Inline patterns such as *emphasis* are handled by means of auxiliary +objects, one per pattern. Pattern objects must be instances of classes +that extend markdown.Pattern. Each pattern object uses a single regular +expression and needs support the following methods: + + pattern.getCompiledRegExp() - returns a regular expression + + pattern.handleMatch(m, doc) - takes a match object and returns + a NanoDom node (as a part of the provided + doc) or None + +All of python markdown's built-in patterns subclass from Patter, +but you can add additional patterns that don't. + +Also note that all the regular expressions used by inline must +capture the whole block. For this reason, they all start with +'^(.*)' and end with '(.*)!'. In case with built-in expression +Pattern takes care of adding the "^(.*)" and "(.*)!". + +Finally, the order in which regular expressions are applied is very +important - e.g. if we first replace http://.../ links with tags +and _then_ try to replace inline html, we would end up with a mess. +So, we apply the expressions in the following order: + + * escape and backticks have to go before everything else, so + that we can preempt any markdown patterns by escaping them. + + * then we handle auto-links (must be done before inline html) + + * then we handle inline HTML. At this point we will simply + replace all inline HTML strings with a placeholder and add + the actual HTML to a hash. + + * then inline images (must be done before links) + + * then bracketed links, first regular then reference-style + + * finally we apply strong and emphasis +""" + +NOBRACKET = r'[^\]\[]*' +BRK = ( r'\[(' + + (NOBRACKET + r'(\['+NOBRACKET)*6 + + (NOBRACKET+ r'\])*'+NOBRACKET)*6 + + NOBRACKET + r')\]' ) + +BACKTICK_RE = r'\`([^\`]*)\`' # `e= m*c^2` +DOUBLE_BACKTICK_RE = r'\`\`(.*)\`\`' # ``e=f("`")`` +ESCAPE_RE = r'\\(.)' # \< +EMPHASIS_RE = r'\*([^\*]*)\*' # *emphasis* +STRONG_RE = r'\*\*(.*)\*\*' # **strong** +STRONG_EM_RE = r'\*\*\*([^_]*)\*\*\*' # ***strong*** + +if SMART_EMPHASIS: + EMPHASIS_2_RE = r'(?\)' # [text]() +IMAGE_LINK_RE = r'\!' + BRK + r'\s*\(([^\)]*)\)' # ![alttxt](http://x.com/) +REFERENCE_RE = BRK+ r'\s*\[([^\]]*)\]' # [Google][3] +IMAGE_REFERENCE_RE = r'\!' + BRK + '\s*\[([^\]]*)\]' # ![alt text][2] +NOT_STRONG_RE = r'( \* )' # stand-alone * or _ +AUTOLINK_RE = r'<(http://[^>]*)>' # +AUTOMAIL_RE = r'<([^> \!]*@[^> ]*)>' # +#HTML_RE = r'(\<[^\>]*\>)' # <...> +HTML_RE = r'(\<[a-zA-Z/][^\>]*\>)' # <...> +ENTITY_RE = r'(&[\#a-zA-Z0-9]*;)' # & + +class Pattern: + + def __init__ (self, pattern) : + self.pattern = pattern + self.compiled_re = re.compile("^(.*)%s(.*)$" % pattern, re.DOTALL) + + def getCompiledRegExp (self) : + return self.compiled_re + +BasePattern = Pattern # for backward compatibility + +class SimpleTextPattern (Pattern) : + + def handleMatch(self, m, doc) : + return doc.createTextNode(m.group(2)) + +class SimpleTagPattern (Pattern): + + def __init__ (self, pattern, tag) : + Pattern.__init__(self, pattern) + self.tag = tag + + def handleMatch(self, m, doc) : + el = doc.createElement(self.tag) + el.appendChild(doc.createTextNode(m.group(2))) + return el + +class BacktickPattern (Pattern): + + def __init__ (self, pattern): + Pattern.__init__(self, pattern) + self.tag = "code" + + def handleMatch(self, m, doc) : + el = doc.createElement(self.tag) + text = m.group(2).strip() + #text = text.replace("&", "&") + el.appendChild(doc.createTextNode(text)) + return el + + +class DoubleTagPattern (SimpleTagPattern) : + + def handleMatch(self, m, doc) : + tag1, tag2 = self.tag.split(",") + el1 = doc.createElement(tag1) + el2 = doc.createElement(tag2) + el1.appendChild(el2) + el2.appendChild(doc.createTextNode(m.group(2))) + return el1 + + +class HtmlPattern (Pattern): + + def handleMatch (self, m, doc) : + place_holder = self.stash.store(m.group(2)) + return doc.createTextNode(place_holder) + + +class LinkPattern (Pattern): + + def handleMatch(self, m, doc) : + el = doc.createElement('a') + el.appendChild(doc.createTextNode(m.group(2))) + parts = m.group(9).split() + # We should now have [], [href], or [href, title] + if parts : + el.setAttribute('href', parts[0]) + else : + el.setAttribute('href', "") + if len(parts) > 1 : + # we also got a title + title = " ".join(parts[1:]).strip() + title = dequote(title) #.replace('"', """) + el.setAttribute('title', title) + return el + + +class ImagePattern (Pattern): + + def handleMatch(self, m, doc): + el = doc.createElement('img') + src_parts = m.group(9).split() + el.setAttribute('src', src_parts[0]) + if len(src_parts) > 1 : + el.setAttribute('title', dequote(" ".join(src_parts[1:]))) + if ENABLE_ATTRIBUTES : + text = doc.createTextNode(m.group(2)) + el.appendChild(text) + text.handleAttributes() + truealt = text.value + el.childNodes.remove(text) + else: + truealt = m.group(2) + el.setAttribute('alt', truealt) + return el + +class ReferencePattern (Pattern): + + def handleMatch(self, m, doc): + if m.group(9) : + id = m.group(9).lower() + else : + # if we got something like "[Google][]" + # we'll use "google" as the id + id = m.group(2).lower() + if not self.references.has_key(id) : # ignore undefined refs + return None + href, title = self.references[id] + text = m.group(2) + return self.makeTag(href, title, text, doc) + + def makeTag(self, href, title, text, doc): + el = doc.createElement('a') + el.setAttribute('href', href) + if title : + el.setAttribute('title', title) + el.appendChild(doc.createTextNode(text)) + return el + + +class ImageReferencePattern (ReferencePattern): + + def makeTag(self, href, title, text, doc): + el = doc.createElement('img') + el.setAttribute('src', href) + if title : + el.setAttribute('title', title) + el.setAttribute('alt', text) + return el + + +class AutolinkPattern (Pattern): + + def handleMatch(self, m, doc): + el = doc.createElement('a') + el.setAttribute('href', m.group(2)) + el.appendChild(doc.createTextNode(m.group(2))) + return el + +class AutomailPattern (Pattern): + + def handleMatch(self, m, doc) : + el = doc.createElement('a') + email = m.group(2) + if email.startswith("mailto:"): + email = email[len("mailto:"):] + for letter in email: + entity = doc.createEntityReference("#%d" % ord(letter)) + el.appendChild(entity) + mailto = "mailto:" + email + mailto = "".join(['&#%d;' % ord(letter) for letter in mailto]) + el.setAttribute('href', mailto) + return el + +ESCAPE_PATTERN = SimpleTextPattern(ESCAPE_RE) +NOT_STRONG_PATTERN = SimpleTextPattern(NOT_STRONG_RE) + +BACKTICK_PATTERN = BacktickPattern(BACKTICK_RE) +DOUBLE_BACKTICK_PATTERN = BacktickPattern(DOUBLE_BACKTICK_RE) +STRONG_PATTERN = SimpleTagPattern(STRONG_RE, 'strong') +STRONG_PATTERN_2 = SimpleTagPattern(STRONG_2_RE, 'strong') +EMPHASIS_PATTERN = SimpleTagPattern(EMPHASIS_RE, 'em') +EMPHASIS_PATTERN_2 = SimpleTagPattern(EMPHASIS_2_RE, 'em') + +STRONG_EM_PATTERN = DoubleTagPattern(STRONG_EM_RE, 'strong,em') +STRONG_EM_PATTERN_2 = DoubleTagPattern(STRONG_EM_2_RE, 'strong,em') + +LINK_PATTERN = LinkPattern(LINK_RE) +LINK_ANGLED_PATTERN = LinkPattern(LINK_ANGLED_RE) +IMAGE_LINK_PATTERN = ImagePattern(IMAGE_LINK_RE) +IMAGE_REFERENCE_PATTERN = ImageReferencePattern(IMAGE_REFERENCE_RE) +REFERENCE_PATTERN = ReferencePattern(REFERENCE_RE) + +HTML_PATTERN = HtmlPattern(HTML_RE) +ENTITY_PATTERN = HtmlPattern(ENTITY_RE) + +AUTOLINK_PATTERN = AutolinkPattern(AUTOLINK_RE) +AUTOMAIL_PATTERN = AutomailPattern(AUTOMAIL_RE) + + +""" +====================================================================== +========================== POST-PROCESSORS =========================== +====================================================================== + +Markdown also allows post-processors, which are similar to +preprocessors in that they need to implement a "run" method. Unlike +pre-processors, they take a NanoDom document as a parameter and work +with that. + +Post-Processor should extend markdown.Postprocessor. + +There are currently no standard post-processors, but the footnote +extension below uses one. +""" + +class Postprocessor : + pass + + +""" +====================================================================== +========================== MISC AUXILIARY CLASSES ==================== +====================================================================== +""" + +class HtmlStash : + """This class is used for stashing HTML objects that we extract + in the beginning and replace with place-holders.""" + + def __init__ (self) : + self.html_counter = 0 # for counting inline html segments + self.rawHtmlBlocks=[] + + def store(self, html) : + """Saves an HTML segment for later reinsertion. Returns a + placeholder string that needs to be inserted into the + document. + + @param html: an html segment + @returns : a placeholder string """ + self.rawHtmlBlocks.append(html) + placeholder = HTML_PLACEHOLDER % self.html_counter + self.html_counter += 1 + return placeholder + + +class BlockGuru : + + def _findHead(self, lines, fn, allowBlank=0) : + + """Functional magic to help determine boundaries of indented + blocks. + + @param lines: an array of strings + @param fn: a function that returns a substring of a string + if the string matches the necessary criteria + @param allowBlank: specifies whether it's ok to have blank + lines between matching functions + @returns: a list of post processes items and the unused + remainder of the original list""" + + items = [] + item = -1 + + i = 0 # to keep track of where we are + + for line in lines : + + if not line.strip() and not allowBlank: + return items, lines[i:] + + if not line.strip() and allowBlank: + # If we see a blank line, this _might_ be the end + i += 1 + + # Find the next non-blank line + for j in range(i, len(lines)) : + if lines[j].strip() : + next = lines[j] + break + else : + # There is no more text => this is the end + break + + # Check if the next non-blank line is still a part of the list + + part = fn(next) + + if part : + items.append("") + continue + else : + break # found end of the list + + part = fn(line) + + if part : + items.append(part) + i += 1 + continue + else : + return items, lines[i:] + else : + i += 1 + + return items, lines[i:] + + + def detabbed_fn(self, line) : + """ An auxiliary method to be passed to _findHead """ + m = RE.regExp['tabbed'].match(line) + if m: + return m.group(4) + else : + return None + + + def detectTabbed(self, lines) : + + return self._findHead(lines, self.detabbed_fn, + allowBlank = 1) + + +def print_error(string): + """Print an error string to stderr""" + sys.stderr.write(string +'\n') + + +def dequote(string) : + """ Removes quotes from around a string """ + if ( ( string.startswith('"') and string.endswith('"')) + or (string.startswith("'") and string.endswith("'")) ) : + return string[1:-1] + else : + return string + +""" +====================================================================== +========================== CORE MARKDOWN ============================= +====================================================================== + +This stuff is ugly, so if you are thinking of extending the syntax, +see first if you can do it via pre-processors, post-processors, +inline patterns or a combination of the three. +""" + +class CorePatterns : + """This class is scheduled for removal as part of a refactoring + effort.""" + + patterns = { + 'header': r'(#*)([^#]*)(#*)', # # A title + 'reference-def' : r'(\ ?\ ?\ ?)\[([^\]]*)\]:\s*([^ ]*)(.*)', + # [Google]: http://www.google.com/ + 'containsline': r'([-]*)$|^([=]*)', # -----, =====, etc. + 'ol': r'[ ]{0,3}[\d]*\.\s+(.*)', # 1. text + 'ul': r'[ ]{0,3}[*+-]\s+(.*)', # "* text" + 'isline1': r'(\**)', # *** + 'isline2': r'(\-*)', # --- + 'isline3': r'(\_*)', # ___ + 'tabbed': r'((\t)|( ))(.*)', # an indented line + 'quoted' : r'> ?(.*)', # a quoted block ("> ...") + } + + def __init__ (self) : + + self.regExp = {} + for key in self.patterns.keys() : + self.regExp[key] = re.compile("^%s$" % self.patterns[key], + re.DOTALL) + + self.regExp['containsline'] = re.compile(r'^([-]*)$|^([=]*)$', re.M) + +RE = CorePatterns() + + +class Markdown: + """ Markdown formatter class for creating an html document from + Markdown text """ + + + def __init__(self, source=None, + extensions=None, + extension_configs=None, + encoding=None, + safe_mode = True): + """Creates a new Markdown instance. + + @param source: The text in Markdown format. + @param encoding: The character encoding of . """ + + self.safeMode = safe_mode + self.encoding = encoding + self.source = source + self.blockGuru = BlockGuru() + self.registeredExtensions = [] + self.stripTopLevelTags = 1 + + self.preprocessors = [ HEADER_PREPROCESSOR, + LINE_PREPROCESSOR, + HTML_BLOCK_PREPROCESSOR, + LINE_BREAKS_PREPROCESSOR, + # A footnote preprocessor will + # get inserted here + REFERENCE_PREPROCESSOR ] + + + self.postprocessors = [] # a footnote postprocessor will get + # inserted later + + self.textPostprocessors = [] # a footnote postprocessor will get + # inserted later + + self.prePatterns = [] + + + self.inlinePatterns = [ DOUBLE_BACKTICK_PATTERN, + BACKTICK_PATTERN, + ESCAPE_PATTERN, + IMAGE_LINK_PATTERN, + IMAGE_REFERENCE_PATTERN, + REFERENCE_PATTERN, + LINK_ANGLED_PATTERN, + LINK_PATTERN, + AUTOLINK_PATTERN, + AUTOMAIL_PATTERN, + HTML_PATTERN, + ENTITY_PATTERN, + NOT_STRONG_PATTERN, + STRONG_EM_PATTERN, + STRONG_EM_PATTERN_2, + STRONG_PATTERN, + STRONG_PATTERN_2, + EMPHASIS_PATTERN, + EMPHASIS_PATTERN_2 + # The order of the handlers matters!!! + ] + + self.registerExtensions(extensions = extensions, + configs = extension_configs) + + self.reset() + + + def registerExtensions(self, extensions, configs) : + + if not configs : + configs = {} + + for ext in extensions : + + extension_module_name = "libprs500.ebooks.markdown.mdx_" + ext + try : + module = sys.modules[extension_module_name] + except : + message(CRITICAL, + "couldn't load extension %s (looking for %s module)" + % (ext, extension_module_name) ) + else : + + if configs.has_key(ext) : + configs_for_ext = configs[ext] + else : + configs_for_ext = [] + extension = module.makeExtension(configs_for_ext) + extension.extendMarkdown(self, globals()) + + + + + def registerExtension(self, extension) : + """ This gets called by the extension """ + self.registeredExtensions.append(extension) + + def reset(self) : + """Resets all state variables so that we can start + with a new text.""" + self.references={} + self.htmlStash = HtmlStash() + + HTML_BLOCK_PREPROCESSOR.stash = self.htmlStash + REFERENCE_PREPROCESSOR.references = self.references + HTML_PATTERN.stash = self.htmlStash + ENTITY_PATTERN.stash = self.htmlStash + REFERENCE_PATTERN.references = self.references + IMAGE_REFERENCE_PATTERN.references = self.references + + for extension in self.registeredExtensions : + extension.reset() + + + def _transform(self): + """Transforms the Markdown text into a XHTML body document + + @returns: A NanoDom Document """ + + # Setup the document + + self.doc = Document() + self.top_element = self.doc.createElement("span") + self.top_element.appendChild(self.doc.createTextNode('\n')) + self.top_element.setAttribute('class', 'markdown') + self.doc.appendChild(self.top_element) + + # Fixup the source text + text = self.source.strip() + text = text.replace("\r\n", "\n").replace("\r", "\n") + text += "\n\n" + text = text.expandtabs(TAB_LENGTH) + + # Split into lines and run the preprocessors that will work with + # self.lines + + self.lines = text.split("\n") + + # Run the pre-processors on the lines + for prep in self.preprocessors : + self.lines = prep.run(self.lines) + + # Create a NanoDom tree from the lines and attach it to Document + + + buffer = [] + for line in self.lines : + if line.startswith("#") : + self._processSection(self.top_element, buffer) + buffer = [line] + else : + buffer.append(line) + self._processSection(self.top_element, buffer) + + #self._processSection(self.top_element, self.lines) + + # Not sure why I put this in but let's leave it for now. + self.top_element.appendChild(self.doc.createTextNode('\n')) + + # Run the post-processors + for postprocessor in self.postprocessors : + postprocessor.run(self.doc) + + return self.doc + + + def _processSection(self, parent_elem, lines, + inList = 0, looseList = 0) : + + """Process a section of a source document, looking for high + level structural elements like lists, block quotes, code + segments, html blocks, etc. Some those then get stripped + of their high level markup (e.g. get unindented) and the + lower-level markup is processed recursively. + + @param parent_elem: A NanoDom element to which the content + will be added + @param lines: a list of lines + @param inList: a level + @returns: None""" + + if not lines : + return + + # Check if this section starts with a list, a blockquote or + # a code block + + processFn = { 'ul' : self._processUList, + 'ol' : self._processOList, + 'quoted' : self._processQuote, + 'tabbed' : self._processCodeBlock } + + for regexp in ['ul', 'ol', 'quoted', 'tabbed'] : + m = RE.regExp[regexp].match(lines[0]) + if m : + processFn[regexp](parent_elem, lines, inList) + return + + # We are NOT looking at one of the high-level structures like + # lists or blockquotes. So, it's just a regular paragraph + # (though perhaps nested inside a list or something else). If + # we are NOT inside a list, we just need to look for a blank + # line to find the end of the block. If we ARE inside a + # list, however, we need to consider that a sublist does not + # need to be separated by a blank line. Rather, the following + # markup is legal: + # + # * The top level list item + # + # Another paragraph of the list. This is where we are now. + # * Underneath we might have a sublist. + # + + if inList : + + start, theRest = self._linesUntil(lines, (lambda line: + RE.regExp['ul'].match(line) + or RE.regExp['ol'].match(line) + or not line.strip())) + + self._processSection(parent_elem, start, + inList - 1, looseList = looseList) + self._processSection(parent_elem, theRest, + inList - 1, looseList = looseList) + + + else : # Ok, so it's just a simple block + + paragraph, theRest = self._linesUntil(lines, lambda line: + not line.strip()) + + if len(paragraph) and paragraph[0].startswith('#') : + m = RE.regExp['header'].match(paragraph[0]) + if m : + level = len(m.group(1)) + h = self.doc.createElement("h%d" % level) + parent_elem.appendChild(h) + for item in self._handleInlineWrapper2(m.group(2).strip()) : + h.appendChild(item) + else : + message(CRITICAL, "We've got a problem header!") + + elif paragraph : + + list = self._handleInlineWrapper2("\n".join(paragraph)) + + if ( parent_elem.nodeName == 'li' + and not (looseList or parent_elem.childNodes)): + + #and not parent_elem.childNodes) : + # If this is the first paragraph inside "li", don't + # put

around it - append the paragraph bits directly + # onto parent_elem + el = parent_elem + else : + # Otherwise make a "p" element + el = self.doc.createElement("p") + parent_elem.appendChild(el) + + for item in list : + el.appendChild(item) + + if theRest : + theRest = theRest[1:] # skip the first (blank) line + + self._processSection(parent_elem, theRest, inList) + + + + def _processUList(self, parent_elem, lines, inList) : + self._processList(parent_elem, lines, inList, + listexpr='ul', tag = 'ul') + + def _processOList(self, parent_elem, lines, inList) : + self._processList(parent_elem, lines, inList, + listexpr='ol', tag = 'ol') + + + def _processList(self, parent_elem, lines, inList, listexpr, tag) : + """Given a list of document lines starting with a list item, + finds the end of the list, breaks it up, and recursively + processes each list item and the remainder of the text file. + + @param parent_elem: A dom element to which the content will be added + @param lines: a list of lines + @param inList: a level + @returns: None""" + + ul = self.doc.createElement(tag) # ul might actually be '

    ' + parent_elem.appendChild(ul) + + looseList = 0 + + # Make a list of list items + items = [] + item = -1 + + i = 0 # a counter to keep track of where we are + + for line in lines : + + loose = 0 + if not line.strip() : + # If we see a blank line, this _might_ be the end of the list + i += 1 + loose = 1 + + # Find the next non-blank line + for j in range(i, len(lines)) : + if lines[j].strip() : + next = lines[j] + break + else : + # There is no more text => end of the list + break + + # Check if the next non-blank line is still a part of the list + if ( RE.regExp['ul'].match(next) or + RE.regExp['ol'].match(next) or + RE.regExp['tabbed'].match(next) ): + # get rid of any white space in the line + items[item].append(line.strip()) + looseList = loose or looseList + continue + else : + break # found end of the list + + # Now we need to detect list items (at the current level) + # while also detabing child elements if necessary + + for expr in ['ul', 'ol', 'tabbed']: + + m = RE.regExp[expr].match(line) + if m : + if expr in ['ul', 'ol'] : # We are looking at a new item + if m.group(1) : + items.append([m.group(1)]) + item += 1 + elif expr == 'tabbed' : # This line needs to be detabbed + items[item].append(m.group(4)) #after the 'tab' + + i += 1 + break + else : + items[item].append(line) # Just regular continuation + i += 1 # added on 2006.02.25 + else : + i += 1 + + # Add the dom elements + for item in items : + li = self.doc.createElement("li") + ul.appendChild(li) + + self._processSection(li, item, inList + 1, looseList = looseList) + + # Process the remaining part of the section + + self._processSection(parent_elem, lines[i:], inList) + + + def _linesUntil(self, lines, condition) : + """ A utility function to break a list of lines upon the + first line that satisfied a condition. The condition + argument should be a predicate function. + """ + + i = -1 + for line in lines : + i += 1 + if condition(line) : break + else : + i += 1 + return lines[:i], lines[i:] + + def _processQuote(self, parent_elem, lines, inList) : + """Given a list of document lines starting with a quote finds + the end of the quote, unindents it and recursively + processes the body of the quote and the remainder of the + text file. + + @param parent_elem: DOM element to which the content will be added + @param lines: a list of lines + @param inList: a level + @returns: None """ + + dequoted = [] + i = 0 + for line in lines : + m = RE.regExp['quoted'].match(line) + if m : + dequoted.append(m.group(1)) + i += 1 + else : + break + else : + i += 1 + + blockquote = self.doc.createElement('blockquote') + parent_elem.appendChild(blockquote) + + self._processSection(blockquote, dequoted, inList) + self._processSection(parent_elem, lines[i:], inList) + + + + + def _processCodeBlock(self, parent_elem, lines, inList) : + """Given a list of document lines starting with a code block + finds the end of the block, puts it into the dom verbatim + wrapped in ("
    ") and recursively processes the
    +           the remainder of the text file.
    +
    +           @param parent_elem: DOM element to which the content will be added
    +           @param lines: a list of lines
    +           @param inList: a level
    +           @returns: None"""
    +
    +        detabbed, theRest = self.blockGuru.detectTabbed(lines)
    +
    +        pre = self.doc.createElement('pre')
    +        #code = self.doc.createElement('code')
    +        parent_elem.appendChild(pre)
    +        #pre.appendChild(code)
    +        text = "\n".join(detabbed).rstrip()+"\n"
    +        #text = text.replace("&", "&")
    +        pre.appendChild(self.doc.createTextNode(text))
    +        self._processSection(parent_elem, theRest, inList)
    +
    +
    +    def _handleInlineWrapper2 (self, line) :
    +
    +
    +        parts = [line]
    +
    +        #if not(line):
    +        #    return [self.doc.createTextNode(' ')]
    +
    +        for pattern in self.inlinePatterns :
    +
    +            #print
    +            #print self.inlinePatterns.index(pattern)
    +
    +            i = 0
    +
    +            #print parts
    +            while i < len(parts) :
    +                
    +                x = parts[i]
    +                #print i
    +                if isinstance(x, (str, unicode)) :
    +                    result = self._applyPattern(x, pattern)
    +                    #print result
    +                    #print result
    +                    #print parts, i
    +                    if result :
    +                        i -= 1
    +                        parts.remove(x)
    +                        for y in result :
    +                            parts.insert(i+1,y)
    +                
    +                i += 1
    +
    +        for i in range(len(parts)) :
    +            x = parts[i]
    +            if isinstance(x, (str, unicode)) :
    +                parts[i] = self.doc.createTextNode(x)
    +
    +        return parts
    +        
    +
    +
    +    def _handleInlineWrapper (self, line) :
    +
    +        # A wrapper around _handleInline to avoid recursion
    +
    +        parts = [line]
    +
    +        i = 0
    +        
    +        while i < len(parts) :
    +            x = parts[i]
    +            if isinstance(x, (str, unicode)) :
    +                parts.remove(x)
    +                result = self._handleInline(x)
    +                for y in result :
    +                    parts.insert(i,y)
    +            else :
    +                i += 1
    +
    +        return parts
    +
    +    def _handleInline(self,  line):
    +        """Transform a Markdown line with inline elements to an XHTML
    +        fragment.
    +
    +        This function uses auxiliary objects called inline patterns.
    +        See notes on inline patterns above.
    +
    +        @param item: A block of Markdown text
    +        @return: A list of NanoDom nodes """
    +
    +        if not(line):
    +            return [self.doc.createTextNode(' ')]
    +
    +        for pattern in self.inlinePatterns :
    +            list = self._applyPattern( line, pattern)
    +            if list: return list
    +
    +        return [self.doc.createTextNode(line)]
    +
    +    def _applyPattern(self, line, pattern) :
    +        """ Given a pattern name, this function checks if the line
    +        fits the pattern, creates the necessary elements, and returns
    +        back a list consisting of NanoDom elements and/or strings.
    +        
    +        @param line: the text to be processed
    +        @param pattern: the pattern to be checked
    +
    +        @returns: the appropriate newly created NanoDom element if the
    +                  pattern matches, None otherwise.
    +        """
    +
    +        # match the line to pattern's pre-compiled reg exp.
    +        # if no match, move on.
    +
    +        m = pattern.getCompiledRegExp().match(line)
    +        if not m :
    +            return None
    +
    +        # if we got a match let the pattern make us a NanoDom node
    +        # if it doesn't, move on
    +        node = pattern.handleMatch(m, self.doc)
    +
    +        if node :
    +            # Those are in the reverse order!
    +            return ( m.groups()[-1], # the string to the left
    +                     node,           # the new node
    +                     m.group(1))     # the string to the right of the match
    +
    +        else :
    +            return None
    +
    +    def __str__(self, source = None):
    +        """Return the document in XHTML format.
    +
    +        @returns: A serialized XHTML body."""
    +        #try :
    +
    +        if source :
    +            self.source = source
    +        
    +        doc = self._transform()
    +        xml = doc.toxml()
    +
    +        #finally:
    +        #    doc.unlink()
    +
    +        # Let's stick in all the raw html pieces
    +
    +        for i in range(self.htmlStash.html_counter) :
    +            html = self.htmlStash.rawHtmlBlocks[i]
    +            if self.safeMode :
    +                html = "[HTML_REMOVED]"
    +                
    +            xml = xml.replace("

    %s\n

    " % (HTML_PLACEHOLDER % i), + html + "\n") + xml = xml.replace(HTML_PLACEHOLDER % i, + html) + + # And return everything but the top level tag + + if self.stripTopLevelTags : + xml = xml.strip()[23:-7] + "\n" + + for pp in self.textPostprocessors : + xml = pp.run(xml) + + return xml + + + toString = __str__ + + + def __unicode__(self): + """Return the document in XHTML format as a Unicode object. + """ + return str(self)#.decode(self.encoding) + + + toUnicode = __unicode__ + + + + +# ==================================================================== + +def markdownFromFile(input = None, + output = None, + extensions = [], + encoding = None, + message_threshold = CRITICAL, + safe = False) : + + global MESSAGE_THRESHOLD + MESSAGE_THRESHOLD = message_threshold + + message(VERBOSE, "input file: %s" % input) + + + if not encoding : + encoding = "utf-8" + + input_file = codecs.open(input, mode="r", encoding="utf-8") + text = input_file.read() + input_file.close() + + new_text = markdown(text, extensions, encoding, safe_mode = safe) + + if output : + output_file = codecs.open(output, "w", encoding=encoding) + output_file.write(new_text) + output_file.close() + + else : + sys.stdout.write(new_text.encode(encoding)) + +def markdown(text, + extensions = [], + encoding = None, + safe_mode = False) : + + message(VERBOSE, "in markdown.markdown(), received text:\n%s" % text) + + extension_names = [] + extension_configs = {} + + for ext in extensions : + pos = ext.find("(") + if pos == -1 : + extension_names.append(ext) + else : + name = ext[:pos] + extension_names.append(name) + pairs = [x.split("=") for x in ext[pos+1:-1].split(",")] + configs = [(x.strip(), y.strip()) for (x, y) in pairs] + extension_configs[name] = configs + #print configs + + md = Markdown(text, extensions=extension_names, + extension_configs=extension_configs, + safe_mode = safe_mode) + + return md.toString() + + +class Extension : + + def __init__(self, configs={}) : + self.config = configs + + def getConfig(self, key) : + if self.config.has_key(key) : + #print self.config[key][0] + return self.config[key][0] + else : + return "" + + def getConfigInfo(self) : + return [(key, self.config[key][1]) for key in self.config.keys()] + + def setConfig(self, key, value) : + self.config[key][0] = value + + +OPTPARSE_WARNING = """ +Python 2.3 or higher required for advanced command line options. +For lower versions of Python use: + + %s INPUT_FILE > OUTPUT_FILE + +""" % EXECUTABLE_NAME_FOR_USAGE + +def parse_options() : + + try : + optparse = __import__("optparse") + except : + if len(sys.argv) == 2 : + return {'input' : sys.argv[1], + 'output' : None, + 'message_threshold' : CRITICAL, + 'safe' : False, + 'extensions' : [], + 'encoding' : None } + + else : + print OPTPARSE_WARNING + return None + + parser = optparse.OptionParser(usage="%prog INPUTFILE [options]") + + parser.add_option("-f", "--file", dest="filename", + help="write output to OUTPUT_FILE", + metavar="OUTPUT_FILE") + parser.add_option("-e", "--encoding", dest="encoding", + help="encoding for input and output files",) + parser.add_option("-q", "--quiet", default = CRITICAL, + action="store_const", const=NONE, dest="verbose", + help="suppress all messages") + parser.add_option("-v", "--verbose", + action="store_const", const=INFO, dest="verbose", + help="print info messages") + parser.add_option("-s", "--safe", + action="store_const", const=True, dest="safe", + help="same mode (strip user's HTML tag)") + + parser.add_option("--noisy", + action="store_const", const=VERBOSE, dest="verbose", + help="print debug messages") + parser.add_option("-x", "--extension", action="append", dest="extensions", + help = "load extension EXTENSION", metavar="EXTENSION") + + (options, args) = parser.parse_args() + + if not len(args) == 1 : + parser.print_help() + return None + else : + input_file = args[0] + + if not options.extensions : + options.extensions = [] + + return {'input' : input_file, + 'output' : options.filename, + 'message_threshold' : options.verbose, + 'safe' : options.safe, + 'extensions' : options.extensions, + 'encoding' : options.encoding } + +def main(): + """ Run Markdown from the command line. """ + for a in ['-x', 'toc', '-x', 'tables', '-x', 'footnotes']: + sys.argv.append(a) + + options = parse_options() + + #if os.access(inFile, os.R_OK): + + if not options : + sys.exit(0) + + markdownFromFile(**options) + +if __name__ == '__main__': + main() + + + + + + + + diff --git a/src/libprs500/ebooks/markdown/mdx_footnotes.py b/src/libprs500/ebooks/markdown/mdx_footnotes.py new file mode 100644 index 0000000000..701da46823 --- /dev/null +++ b/src/libprs500/ebooks/markdown/mdx_footnotes.py @@ -0,0 +1,255 @@ +""" +## To see this file as plain text go to +## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content + +========================= FOOTNOTES ================================= + +This section adds footnote handling to markdown. It can be used as +an example for extending python-markdown with relatively complex +functionality. While in this case the extension is included inside +the module itself, it could just as easily be added from outside the +module. Not that all markdown classes above are ignorant about +footnotes. All footnote functionality is provided separately and +then added to the markdown instance at the run time. + +Footnote functionality is attached by calling extendMarkdown() +method of FootnoteExtension. The method also registers the +extension to allow it's state to be reset by a call to reset() +method. +""" + +FN_BACKLINK_TEXT = "zz1337820767766393qq" + + +import re, markdown, random + +class FootnoteExtension (markdown.Extension): + + DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)') + SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a] + + def __init__ (self, configs) : + + self.config = {'PLACE_MARKER' : + ["///Footnotes Go Here///", + "The text string that marks where the footnotes go"]} + + for key, value in configs : + self.config[key][0] = value + + self.reset() + + def extendMarkdown(self, md, md_globals) : + + self.md = md + + # Stateless extensions do not need to be registered + md.registerExtension(self) + + # Insert a preprocessor before ReferencePreprocessor + index = md.preprocessors.index(md_globals['REFERENCE_PREPROCESSOR']) + preprocessor = FootnotePreprocessor(self) + preprocessor.md = md + md.preprocessors.insert(index, preprocessor) + + # Insert an inline pattern before ImageReferencePattern + FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah + index = md.inlinePatterns.index(md_globals['IMAGE_REFERENCE_PATTERN']) + md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self)) + + # Insert a post-processor that would actually add the footnote div + postprocessor = FootnotePostprocessor(self) + postprocessor.extension = self + + md.postprocessors.append(postprocessor) + + textPostprocessor = FootnoteTextPostprocessor(self) + + md.textPostprocessors.append(textPostprocessor) + + + def reset(self) : + # May be called by Markdown is state reset is desired + + self.footnote_suffix = "-" + str(int(random.random()*1000000000)) + self.used_footnotes={} + self.footnotes = {} + + def findFootnotesPlaceholder(self, doc) : + def findFootnotePlaceholderFn(node=None, indent=0): + if node.type == 'text': + if node.value.find(self.getConfig("PLACE_MARKER")) > -1 : + return True + + fn_div_list = doc.find(findFootnotePlaceholderFn) + if fn_div_list : + return fn_div_list[0] + + + def setFootnote(self, id, text) : + self.footnotes[id] = text + + def makeFootnoteId(self, num) : + return 'fn%d%s' % (num, self.footnote_suffix) + + def makeFootnoteRefId(self, num) : + return 'fnr%d%s' % (num, self.footnote_suffix) + + def makeFootnotesDiv (self, doc) : + """Creates the div with class='footnote' and populates it with + the text of the footnotes. + + @returns: the footnote div as a dom element """ + + if not self.footnotes.keys() : + return None + + div = doc.createElement("div") + div.setAttribute('class', 'footnote') + hr = doc.createElement("hr") + div.appendChild(hr) + ol = doc.createElement("ol") + div.appendChild(ol) + + footnotes = [(self.used_footnotes[id], id) + for id in self.footnotes.keys()] + footnotes.sort() + + for i, id in footnotes : + li = doc.createElement('li') + li.setAttribute('id', self.makeFootnoteId(i)) + + self.md._processSection(li, self.footnotes[id].split("\n")) + + #li.appendChild(doc.createTextNode(self.footnotes[id])) + + backlink = doc.createElement('a') + backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i)) + backlink.setAttribute('class', 'footnoteBackLink') + backlink.setAttribute('title', + 'Jump back to footnote %d in the text' % 1) + backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT)) + + if li.childNodes : + node = li.childNodes[-1] + if node.type == "text" : + node = li + node.appendChild(backlink) + + ol.appendChild(li) + + return div + + +class FootnotePreprocessor : + + def __init__ (self, footnotes) : + self.footnotes = footnotes + + def run(self, lines) : + + self.blockGuru = markdown.BlockGuru() + lines = self._handleFootnoteDefinitions (lines) + + # Make a hash of all footnote marks in the text so that we + # know in what order they are supposed to appear. (This + # function call doesn't really substitute anything - it's just + # a way to get a callback for each occurence. + + text = "\n".join(lines) + self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text) + + return text.split("\n") + + + def recordFootnoteUse(self, match) : + + id = match.group(1) + id = id.strip() + nextNum = len(self.footnotes.used_footnotes.keys()) + 1 + self.footnotes.used_footnotes[id] = nextNum + + + def _handleFootnoteDefinitions(self, lines) : + """Recursively finds all footnote definitions in the lines. + + @param lines: a list of lines of text + @returns: a string representing the text with footnote + definitions removed """ + + i, id, footnote = self._findFootnoteDefinition(lines) + + if id : + + plain = lines[:i] + + detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:]) + + self.footnotes.setFootnote(id, + footnote + "\n" + + "\n".join(detabbed)) + + more_plain = self._handleFootnoteDefinitions(theRest) + return plain + [""] + more_plain + + else : + return lines + + def _findFootnoteDefinition(self, lines) : + """Finds the first line of a footnote definition. + + @param lines: a list of lines of text + @returns: the index of the line containing a footnote definition """ + + counter = 0 + for line in lines : + m = self.footnotes.DEF_RE.match(line) + if m : + return counter, m.group(2), m.group(3) + counter += 1 + return counter, None, None + + +class FootnotePattern (markdown.Pattern) : + + def __init__ (self, pattern, footnotes) : + + markdown.Pattern.__init__(self, pattern) + self.footnotes = footnotes + + def handleMatch(self, m, doc) : + sup = doc.createElement('sup') + a = doc.createElement('a') + sup.appendChild(a) + id = m.group(2) + num = self.footnotes.used_footnotes[id] + sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num)) + a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num)) + a.appendChild(doc.createTextNode(str(num))) + return sup + +class FootnotePostprocessor (markdown.Postprocessor): + + def __init__ (self, footnotes) : + self.footnotes = footnotes + + def run(self, doc) : + footnotesDiv = self.footnotes.makeFootnotesDiv(doc) + if footnotesDiv : + fnPlaceholder = self.extension.findFootnotesPlaceholder(doc) + if fnPlaceholder : + fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv) + else : + doc.documentElement.appendChild(footnotesDiv) + +class FootnoteTextPostprocessor (markdown.Postprocessor): + + def __init__ (self, footnotes) : + self.footnotes = footnotes + + def run(self, text) : + return text.replace(FN_BACKLINK_TEXT, "↩") + +def makeExtension(configs=None) : + return FootnoteExtension(configs=configs) + diff --git a/src/libprs500/ebooks/markdown/mdx_tables.py b/src/libprs500/ebooks/markdown/mdx_tables.py new file mode 100644 index 0000000000..c5c84a4adf --- /dev/null +++ b/src/libprs500/ebooks/markdown/mdx_tables.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +""" +Table extension for Python-Markdown +""" + +import markdown + + +class TablePattern(markdown.Pattern) : + def __init__ (self, md): + markdown.Pattern.__init__(self, r'^\|([^\n]*)\|(\n|$)') + self.md = md + + def handleMatch(self, m, doc) : + # a single line represents a row + tr = doc.createElement('tr') + tr.appendChild(doc.createTextNode('\n')) + # chunks between pipes represent cells + for t in m.group(2).split('|'): + if len(t) >= 2 and t.startswith('*') and t.endswith('*'): + # if a cell is bounded by asterisks, it is a + td = doc.createElement('th') + t = t[1:-1] + else: + # otherwise it is a + td = doc.createElement('td') + # apply inline patterns on chunks + for n in self.md._handleInline(t): + if(type(n) == unicode): + td.appendChild(doc.createTextNode(n)) + else: + td.appendChild(n) + tr.appendChild(td) + # very long lines are evil + tr.appendChild(doc.createTextNode('\n')) + return tr + + +class TablePostprocessor: + def run(self, doc): + # markdown wrapped our s in a

    , we fix that here + def test_for_p(element): + return element.type == 'element' and element.nodeName == 'p' + # replace "p > tr" with "table > tr" + for element in doc.find(test_for_p): + for node in element.childNodes: + if(node.type == 'text' and node.value.strip() == ''): + # skip leading whitespace + continue + if (node.type == 'element' and node.nodeName == 'tr'): + element.nodeName = 'table' + break + + +class TableExtension(markdown.Extension): + def extendMarkdown(self, md, md_globals): + md.inlinePatterns.insert(0, TablePattern(md)) + md.postprocessors.append(TablePostprocessor()) + + +def makeExtension(configs): + return TableExtension(configs) + + diff --git a/src/libprs500/ebooks/markdown/mdx_toc.py b/src/libprs500/ebooks/markdown/mdx_toc.py new file mode 100644 index 0000000000..66a34d90a0 --- /dev/null +++ b/src/libprs500/ebooks/markdown/mdx_toc.py @@ -0,0 +1,165 @@ +## To access this file as plain text go to +## http://freewisdom.org/projects/python-markdown/mdx_toc.raw_content + +""" +Chris Clark - clach04 -at- sf.net + +My markdown extensions for adding: + Table of Contents (aka toc) +""" + +import os +import sys +import re +import markdown + +DEFAULT_TITLE = None + +def extract_alphanumeric(in_str=None): + """take alpha-numeric (7bit ascii) and return as a string + """ + # I'm sure this is really inefficient and + # could be done with a lambda/map() + #x.strip().title().replace(' ', "") + out_str=[] + for x in in_str.title(): + if x.isalnum(): out_str.append(x) + return ''.join(out_str) + +class TitlePostprocessor (markdown.Postprocessor): + + def __init__ (self, extension) : + self.extension = extension + + def run(self, doc) : + titleElement = self.extension.createTitle(doc) + if titleElement : + doc.documentElement.insertChild(0, titleElement) + + +class TocExtension (markdown.Extension): + """Markdown extension: generate a Table Of Contents (aka toc) + toc is returned in a div tag with class='toc' + toc is either: + appended to end of document + OR + replaces first string occurence of "///Table of Contents Goes Here///" + """ + + def __init__ (self) : + #maybe add these as parameters to the class init? + self.TOC_INCLUDE_MARKER = "///Table of Contents///" + self.TOC_TITLE = "Table Of Contents" + self.auto_toc_heading_type=2 + self.toc_heading_type=3 + + + def extendMarkdown(self, md, md_globals) : + # Just insert in the end + md.postprocessors.append(TocPostprocessor(self)) + # Stateless extensions do not need to be registered, so we don't + # register. + + def findTocPlaceholder(self, doc) : + def findTocPlaceholderFn(node=None, indent=0): + if node.type == 'text': + if node.value.find(self.TOC_INCLUDE_MARKER) > -1 : + return True + + toc_div_list = doc.find(findTocPlaceholderFn) + if toc_div_list : + return toc_div_list[0] + + + def createTocDiv(self, doc) : + """ + Creates Table Of Contents based on headers. + + @returns: toc as a single as a dom element + in a

    tag with class='toc' + """ + + # Find headers + headers_compiled_re = re.compile("h[123456]", re.IGNORECASE) + def findHeadersFn(element=None): + if element.type=='element': + if headers_compiled_re.match(element.nodeName): + return True + + headers_doc_list = doc.find(findHeadersFn) + + # Insert anchor tags into dom + generated_anchor_id=0 + headers_list=[] + min_header_size_found = 6 + for element in headers_doc_list: + heading_title = element.childNodes[0].value + if heading_title.strip() !="": + heading_type = int(element.nodeName[-1:]) + if heading_type == self.auto_toc_heading_type: + min_header_size_found=min(min_header_size_found, + heading_type) + + html_anchor_name= (extract_alphanumeric(heading_title) + +'__MD_autoTOC_%d' % (generated_anchor_id)) + + # insert anchor tag inside header tags + html_anchor = doc.createElement("a") + html_anchor.setAttribute('name', html_anchor_name) + element.appendChild(html_anchor) + + headers_list.append( (heading_type, heading_title, + html_anchor_name) ) + generated_anchor_id = generated_anchor_id + 1 + + # create dom for toc + if headers_list != []: + # Create list + toc_doc_list = doc.createElement("ul") + for (heading_type, heading_title, html_anchor_name) in headers_list: + if heading_type == self.auto_toc_heading_type: + toc_doc_entry = doc.createElement("li") + toc_doc_link = doc.createElement("a") + toc_doc_link.setAttribute('href', '#'+html_anchor_name) + toc_doc_text = doc.createTextNode(heading_title) + toc_doc_link.appendChild(toc_doc_text) + toc_doc_entry.appendChild(toc_doc_link) + toc_doc_list.appendChild(toc_doc_entry) + + + # Put list into div + div = doc.createElement("div") + div.setAttribute('class', 'toc') + if self.TOC_TITLE: + toc_header = doc.createElement("h%d"%(self.toc_heading_type) ) + toc_header_text = doc.createTextNode(self.TOC_TITLE) + toc_header.appendChild(toc_header_text) + div.appendChild(toc_header) + div.appendChild(toc_doc_list) + #hr = doc.createElement("hr") + #div.appendChild(hr) + + return div + + +class TocPostprocessor (markdown.Postprocessor): + + def __init__ (self, toc) : + self.toc = toc + + def run(self, doc): + tocPlaceholder = self.toc.findTocPlaceholder(doc) + + tocDiv = self.toc.createTocDiv(doc) + if tocDiv: + if tocPlaceholder : + # Replace "magic" pattern with toc + tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv) + else : + # Dump at the end of the DOM + # Probably want to use CSS to position div + doc.documentElement.appendChild(tocDiv) + + +def makeExtension(configs=None) : + return TocExtension() diff --git a/src/libprs500/ptempfile.py b/src/libprs500/ptempfile.py index 1da4441a30..433a7e0afa 100644 --- a/src/libprs500/ptempfile.py +++ b/src/libprs500/ptempfile.py @@ -47,7 +47,7 @@ class _TemporaryFileWrapper(object): os.remove(self.name) -def PersistentTemporaryFile(suffix="", prefix=""): +def PersistentTemporaryFile(suffix="", prefix="", dir=None): """ Return a temporary file that is available even after being closed on all platforms. It is automatically deleted when this object is deleted. @@ -55,6 +55,7 @@ def PersistentTemporaryFile(suffix="", prefix=""): """ if prefix == None: prefix = "" - fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix) + fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix, + dir=dir) _file = os.fdopen(fd, "wb") return _TemporaryFileWrapper(_file, name) diff --git a/upload.py b/upload.py index 2f6ad6c485..e82bc2c534 100644 --- a/upload.py +++ b/upload.py @@ -9,6 +9,7 @@ PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500" DOWNLOADS = PREFIX+"/httpdocs/downloads" DOCS = PREFIX+"/httpdocs/apidocs" HTML2LRF = "src/libprs500/ebooks/lrf/html/demo" +TXT2LRF = "src/libprs500/ebooks/lrf/txt/demo" check_call = partial(_check_call, shell=True) h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION) @@ -19,7 +20,7 @@ def build_windows(): - vm = h.openVM('/mnt/extra/vmware/Windows Vista/Windows Vista.vmx') + vm = h.openVM('/mnt/backup/vmware/Windows Vista/Windows Vista.vmx') vm.powerOn() if not vm.waitForToolsInGuest(): print >>sys.stderr, 'Windows is not booting up' @@ -43,7 +44,7 @@ def build_osx(): if os.path.exists('dist/dmgdone'): os.unlink('dist/dmgdone') - vm = h.openVM('/mnt/extra/vmware/Mac OSX/Mac OSX.vmx') + vm = h.openVM('/mnt/backup/vmware/Mac OSX/Mac OSX.vmx') vm.powerOn() c = 25 * 60 print 'Waiting (minutes):', @@ -69,6 +70,8 @@ def upload_demo(): f.close() check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,)) check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,)) + check_call('''txt2lrf -t 'Demonstration of txt2lrf' -a 'Kovid Goyal' --header -o /tmp/txt2lrf.lrf %s/demo.txt'''%(TXT2LRF,) ) + check_call('''scp /tmp/txt2lrf.lrf castalia:%s/'''%(DOWNLOADS,)) def upload_installers(exe, dmg): check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,)) @@ -105,4 +108,4 @@ def main(): check_call('''rm -rf dist/* build/*''') if __name__ == '__main__': - main() \ No newline at end of file + main()