Implement support for markdown in txt2lrf. Fix handling of <pre>, <th> elements in html2lrf

This commit is contained in:
Kovid Goyal 2007-06-19 15:13:52 +00:00
parent a082131823
commit 9178ddf18e
13 changed files with 2277 additions and 105 deletions

View File

@ -26,6 +26,7 @@ entry_points = {
'rtf-meta = libprs500.ebooks.metadata.rtf:main', \ 'rtf-meta = libprs500.ebooks.metadata.rtf:main', \
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \ 'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\ 'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\
'markdown = libprs500.ebooks.markdown.markdown:main',\
], ],
'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main'] 'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main']
} }

View File

@ -13,7 +13,7 @@
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
''' E-book management software''' ''' E-book management software'''
__version__ = "0.3.52" __version__ = "0.3.53"
__docformat__ = "epytext" __docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
__appname__ = 'libprs500' __appname__ = 'libprs500'

View File

@ -73,6 +73,16 @@ def option_parser(usage):
profiles=['prs500'] profiles=['prs500']
parser.add_option('-o', '--output', action='store', default=None, \ parser.add_option('-o', '--output', action='store', default=None, \
help='Output file name. Default is derived from input filename') help='Output file name. Default is derived from input filename')
laf = parser.add_option_group('LOOK AND FEEL')
laf.add_option('--cover', action='store', dest='cover', default=None, \
help='Path to file containing image to be used as cover')
laf.add_option('--font-delta', action='store', type='float', default=0., \
help="""Increase the font size by 2 * FONT_DELTA pts and """
'''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
"""If FONT_DELTA is negative, the font size is decreased.""",
dest='font_delta')
laf.add_option('--disable-autorotation', action='store_true', default=False,
help='Disable autorotation of images.', dest='disable_autorotation')
page = parser.add_option_group('PAGE OPTIONS') page = parser.add_option_group('PAGE OPTIONS')
page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice', page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
choices=profiles, action='callback', callback=profile_from_string, choices=profiles, action='callback', callback=profile_from_string,

View File

@ -221,6 +221,7 @@ class HTMLConverter(object):
chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE), chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
link_exclude=re.compile('$'), link_exclude=re.compile('$'),
page_break=re.compile('h[12]', re.IGNORECASE), page_break=re.compile('h[12]', re.IGNORECASE),
force_page_break=re.compile('$', re.IGNORECASE),
profile=PRS500_PROFILE, profile=PRS500_PROFILE,
disable_autorotation=False): disable_autorotation=False):
''' '''
@ -273,7 +274,8 @@ class HTMLConverter(object):
small = {'font-size' :'small'}, small = {'font-size' :'small'},
pre = {'font-family' :'monospace' }, pre = {'font-family' :'monospace' },
tt = {'font-family' :'monospace'}, tt = {'font-family' :'monospace'},
center = {'text-align' : 'center'} center = {'text-align' : 'center'},
th = {'font-size':'large', 'font-weight':'bold'},
) )
self.profile = profile #: Defines the geometry of the display device self.profile = profile #: Defines the geometry of the display device
self.chapter_detection = chapter_detection #: Flag to toggle chapter detection self.chapter_detection = chapter_detection #: Flag to toggle chapter detection
@ -287,7 +289,8 @@ class HTMLConverter(object):
self.blockquote_style = book.create_block_style(sidemargin=60, self.blockquote_style = book.create_block_style(sidemargin=60,
topskip=20, footskip=20) topskip=20, footskip=20)
self.unindented_style = book.create_text_style(parindent=0) self.unindented_style = book.create_text_style(parindent=0)
self.page_break = page_break #: Regex controlling forced page-break behavior self.page_break = page_break #: Regex controlling page-break behavior
self.force_page_break = force_page_break #: Regex controlling forced page-break behavior
self.text_styles = []#: Keep track of already used textstyles self.text_styles = []#: Keep track of already used textstyles
self.block_styles = []#: Keep track of already used blockstyles self.block_styles = []#: Keep track of already used blockstyles
self.images = {} #: Images referenced in the HTML document self.images = {} #: Images referenced in the HTML document
@ -559,6 +562,7 @@ class HTMLConverter(object):
chapter_regex=self.chapter_regex, chapter_regex=self.chapter_regex,
link_exclude=self.link_exclude, link_exclude=self.link_exclude,
page_break=self.page_break, page_break=self.page_break,
force_page_break=self.force_page_break,
disable_autorotation=self.disable_autorotation) disable_autorotation=self.disable_autorotation)
HTMLConverter.processed_files[path] = self.files[path] HTMLConverter.processed_files[path] = self.files[path]
except Exception: except Exception:
@ -829,6 +833,9 @@ class HTMLConverter(object):
tag_css['page-break-after'].lower() != 'avoid': tag_css['page-break-after'].lower() != 'avoid':
end_page = True end_page = True
tag_css.pop('page-break-after') tag_css.pop('page-break-after')
if self.force_page_break.match(tagname):
self.end_page()
self.page_break_found = True
if not self.page_break_found and self.page_break.match(tagname): if not self.page_break_found and self.page_break.match(tagname):
if len(self.current_page.contents) > 3: if len(self.current_page.contents) > 3:
self.end_page() self.end_page()
@ -956,6 +963,7 @@ class HTMLConverter(object):
except ConversionError: except ConversionError:
pass pass
self.end_current_block() self.end_current_block()
self.current_block = self.book.create_text_block()
elif tagname in ['ul', 'ol']: elif tagname in ['ul', 'ol']:
self.in_ol = 1 if tagname == 'ol' else 0 self.in_ol = 1 if tagname == 'ol' else 0
self.end_current_block() self.end_current_block()
@ -1138,13 +1146,15 @@ def process_file(path, options):
re.compile('$') re.compile('$')
pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \ pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
re.compile('$') re.compile('$')
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
re.compile('$')
conv = HTMLConverter(book, path, profile=options.profile, conv = HTMLConverter(book, path, profile=options.profile,
font_delta=options.font_delta, font_delta=options.font_delta,
cover=cpath, max_link_levels=options.link_levels, cover=cpath, max_link_levels=options.link_levels,
verbose=options.verbose, baen=options.baen, verbose=options.verbose, baen=options.baen,
chapter_detection=options.chapter_detection, chapter_detection=options.chapter_detection,
chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE), chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
link_exclude=re.compile(le), page_break=pb, link_exclude=re.compile(le), page_break=pb, force_page_break=fpb,
disable_autorotation=options.disable_autorotation) disable_autorotation=options.disable_autorotation)
conv.process_links() conv.process_links()
oname = options.output oname = options.output
@ -1220,23 +1230,14 @@ def try_opf(path, options):
def parse_options(argv=None, cli=True): def parse_options(argv=None, cli=True, parser=None):
""" CLI for html -> lrf conversions """ """ CLI for html -> lrf conversions """
if not argv: if not argv:
argv = sys.argv[1:] argv = sys.argv[1:]
if not parser:
parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip] parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip]
%prog converts mybook.html to mybook.lrf""") %prog converts mybook.html to mybook.lrf""")
laf = parser.add_option_group('LOOK AND FEEL')
laf.add_option('--cover', action='store', dest='cover', default=None, \
help='Path to file containing image to be used as cover')
laf.add_option('--font-delta', action='store', type='float', default=0., \
help="""Increase the font size by 2 * FONT_DELTA pts and """
'''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
"""If FONT_DELTA is negative, the font size is decreased.""",
dest='font_delta')
laf.add_option('--disable-autorotation', action='store_true', default=False,
help='Disable autorotation of images.', dest='disable_autorotation')
link = parser.add_option_group('LINK PROCESSING OPTIONS') link = parser.add_option_group('LINK PROCESSING OPTIONS')
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \ link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
dest='link_levels', dest='link_levels',
@ -1265,6 +1266,8 @@ def parse_options(argv=None, cli=True):
'''there are no really long pages as this degrades the page ''' '''there are no really long pages as this degrades the page '''
'''turn performance of the LRF. Thus this option is ignored ''' '''turn performance of the LRF. Thus this option is ignored '''
'''if the current page has only a few elements.''') '''if the current page has only a few elements.''')
chapter.add_option('--force-page-break-before', dest='force_page_break',
default='$', help='Like --page-break-before, but page breaks are forced.')
prepro = parser.add_option_group('PREPROCESSING OPTIONS') prepro = parser.add_option_group('PREPROCESSING OPTIONS')
prepro.add_option('--baen', action='store_true', default=False, dest='baen', prepro.add_option('--baen', action='store_true', default=False, dest='baen',
help='''Preprocess Baen HTML files to improve generated LRF.''') help='''Preprocess Baen HTML files to improve generated LRF.''')
@ -1285,7 +1288,8 @@ def main():
if options.verbose: if options.verbose:
import warnings import warnings
warnings.defaultaction = 'error' warnings.defaultaction = 'error'
except: except Exception, err:
print >> sys.stderr, err
sys.exit(1) sys.exit(1)
process_file(src, options) process_file(src, options)

View File

@ -12,7 +12,7 @@
## You should have received a copy of the GNU General Public License along ## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import math, sys import math, sys, re
from libprs500.ebooks.lrf.fonts import get_font from libprs500.ebooks.lrf.fonts import get_font
from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \ from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
@ -215,7 +215,7 @@ class Row(object):
def __init__(self, conv, row, css, colpad): def __init__(self, conv, row, css, colpad):
self.cells = [] self.cells = []
self.colpad = colpad self.colpad = colpad
cells = row.findAll('td') cells = row.findAll(re.compile('td|th'))
for cell in cells: for cell in cells:
ccss = conv.tag_css(cell, css) ccss = conv.tag_css(cell, css)
self.cells.append(Cell(conv, cell, ccss)) self.cells.append(Cell(conv, cell, ccss))

View File

@ -15,19 +15,17 @@
""" """
Convert .txt files to .lrf Convert .txt files to .lrf
""" """
import os, sys import os, sys, codecs
from libprs500.ebooks import BeautifulSoup
from libprs500.ebooks.lrf import ConversionError, option_parser
from libprs500.ebooks.lrf import Book
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
from libprs500 import filename_to_utf8
from libprs500 import iswindows from libprs500 import iswindows
from libprs500.ptempfile import PersistentTemporaryFile
from libprs500.ebooks.lrf import ConversionError, option_parser
from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
from libprs500.ebooks.lrf.html.convert_from import process_file
from libprs500.ebooks.markdown import markdown
def parse_options(argv=None, cli=True): def parse_options(cli=True):
""" CLI for txt -> lrf conversions """ """ CLI for txt -> lrf conversions """
if not argv:
argv = sys.argv[1:]
parser = option_parser( parser = option_parser(
"""usage: %prog [options] mybook.txt """usage: %prog [options] mybook.txt
@ -44,84 +42,78 @@ def parse_options(argv=None, cli=True):
if cli: if cli:
parser.print_help() parser.print_help()
raise ConversionError, 'no filename specified' raise ConversionError, 'no filename specified'
if options.title == None:
options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
return options, args, parser return options, args, parser
def generate_html(txtfile, encoding):
'''
Convert txtfile to html and return a PersistentTemporaryFile object pointing
to the file with the HTML.
'''
encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
if iswindows:
encodings = ['cp1252'] + encodings
if encoding not in ['cp1252', 'utf8']:
encodings = [encoding] + encodings
txt, enc = None, None
for encoding in encodings:
try:
txt = codecs.open(txtfile, 'rb', encoding).read()
except UnicodeDecodeError:
continue
enc = encoding
break
if txt == None:
raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
md = markdown.Markdown(txt,
extensions=['footnotes', 'tables', 'toc'],
encoding=enc,
safe_mode=False,
)
html = md.toString().decode(enc)
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
p.close()
codecs.open(p.name, 'wb', enc).write(html)
return p
def main(): def main():
try: try:
options, args, parser = parse_options() options, args, parser = parse_options()
src = os.path.abspath(os.path.expanduser(args[0])) txt = os.path.abspath(os.path.expanduser(args[0]))
except: p = generate_html(txt, options.encoding)
for i in range(1, len(sys.argv)):
if sys.argv[i] == args[0]:
sys.argv.remove(sys.argv[i])
break
sys.argv.append(p.name)
sys.argv.append('--force-page-break-before')
sys.argv.append('h2')
o_spec = False
for arg in sys.argv[1:]:
arg = arg.lstrip()
if arg.startswith('-o') or arg.startswith('--output'):
o_spec = True
break
ext = '.lrf'
for arg in sys.argv[1:]:
if arg.strip() == '--lrs':
ext = '.lrs'
break
if not o_spec:
sys.argv.append('-o')
sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext)
options, args, parser = html_parse_options(parser=parser)
src = args[0]
if options.verbose:
import warnings
warnings.defaultaction = 'error'
except Exception, err:
print >> sys.stderr, err
import traceback
traceback.print_exc()
sys.exit(1) sys.exit(1)
print 'Output written to ', convert_txt(src, options) process_file(src, options)
def convert_txt(path, options):
"""
Convert the text file at C{path} into an lrf file.
@param options: Object with the following attributes:
C{author}, C{title}, C{encoding} (the assumed encoding of
the text in C{path}.)
"""
import codecs
header = None
if options.header:
header = Paragraph()
header.append(Bold(options.title))
header.append(' by ')
header.append(Italic(options.author))
title = (options.title, options.title_sort)
author = (options.author, options.author_sort)
book = Book(options, header=header, title=title, author=author, \
publisher=options.publisher,
sourceencoding=options.encoding, freetext=options.freetext, \
category=options.category, booksetting=BookSetting
(dpi=10*options.profile.dpi,
screenheight=options.profile.screen_height,
screenwidth=options.profile.screen_width))
buffer = ''
pg = book.create_page()
block = book.create_text_block()
pg.append(block)
book.append(pg)
lines = ""
try:
lines = codecs.open(path, 'rb', options.encoding).readlines()
except UnicodeDecodeError:
try:
lines = codecs.open(path, 'rb', 'cp1252').readlines()
except UnicodeDecodeError:
try:
lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
except UnicodeDecodeError:
try:
lines = codecs.open(path, 'rb', 'koi8_r').readlines()
except UnicodeDecodeError:
try:
lines = codecs.open(path, 'rb', 'koi8_u').readlines()
except UnicodeDecodeError:
lines = codecs.open(path, 'rb', 'utf8').readlines()
for line in lines:
line = line.strip()
if line:
buffer = buffer.rstrip() + ' ' + line
else:
block.Paragraph(buffer)
buffer = ''
basename = os.path.basename(path)
oname = options.output
if not oname:
oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf')
oname = os.path.abspath(os.path.expanduser(oname))
try:
book.renderLrs(oname) if options.lrs else book.renderLrf(oname)
except UnicodeDecodeError:
raise ConversionError(path + ' is not encoded in ' + \
options.encoding +'. Specify the '+ \
'correct encoding with the -e option.')
return os.path.abspath(oname)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -0,0 +1,5 @@
''' Package defines lightweight markup language for processing of txt files'''
# Initialize extensions
from libprs500.ebooks.markdown import mdx_footnotes
from libprs500.ebooks.markdown import mdx_tables
from libprs500.ebooks.markdown import mdx_toc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,255 @@
"""
## To see this file as plain text go to
## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
========================= FOOTNOTES =================================
This section adds footnote handling to markdown. It can be used as
an example for extending python-markdown with relatively complex
functionality. While in this case the extension is included inside
the module itself, it could just as easily be added from outside the
module. Not that all markdown classes above are ignorant about
footnotes. All footnote functionality is provided separately and
then added to the markdown instance at the run time.
Footnote functionality is attached by calling extendMarkdown()
method of FootnoteExtension. The method also registers the
extension to allow it's state to be reset by a call to reset()
method.
"""
FN_BACKLINK_TEXT = "zz1337820767766393qq"
import re, markdown, random
class FootnoteExtension (markdown.Extension):
DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
def __init__ (self, configs) :
self.config = {'PLACE_MARKER' :
["///Footnotes Go Here///",
"The text string that marks where the footnotes go"]}
for key, value in configs :
self.config[key][0] = value
self.reset()
def extendMarkdown(self, md, md_globals) :
self.md = md
# Stateless extensions do not need to be registered
md.registerExtension(self)
# Insert a preprocessor before ReferencePreprocessor
index = md.preprocessors.index(md_globals['REFERENCE_PREPROCESSOR'])
preprocessor = FootnotePreprocessor(self)
preprocessor.md = md
md.preprocessors.insert(index, preprocessor)
# Insert an inline pattern before ImageReferencePattern
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
index = md.inlinePatterns.index(md_globals['IMAGE_REFERENCE_PATTERN'])
md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
# Insert a post-processor that would actually add the footnote div
postprocessor = FootnotePostprocessor(self)
postprocessor.extension = self
md.postprocessors.append(postprocessor)
textPostprocessor = FootnoteTextPostprocessor(self)
md.textPostprocessors.append(textPostprocessor)
def reset(self) :
# May be called by Markdown is state reset is desired
self.footnote_suffix = "-" + str(int(random.random()*1000000000))
self.used_footnotes={}
self.footnotes = {}
def findFootnotesPlaceholder(self, doc) :
def findFootnotePlaceholderFn(node=None, indent=0):
if node.type == 'text':
if node.value.find(self.getConfig("PLACE_MARKER")) > -1 :
return True
fn_div_list = doc.find(findFootnotePlaceholderFn)
if fn_div_list :
return fn_div_list[0]
def setFootnote(self, id, text) :
self.footnotes[id] = text
def makeFootnoteId(self, num) :
return 'fn%d%s' % (num, self.footnote_suffix)
def makeFootnoteRefId(self, num) :
return 'fnr%d%s' % (num, self.footnote_suffix)
def makeFootnotesDiv (self, doc) :
"""Creates the div with class='footnote' and populates it with
the text of the footnotes.
@returns: the footnote div as a dom element """
if not self.footnotes.keys() :
return None
div = doc.createElement("div")
div.setAttribute('class', 'footnote')
hr = doc.createElement("hr")
div.appendChild(hr)
ol = doc.createElement("ol")
div.appendChild(ol)
footnotes = [(self.used_footnotes[id], id)
for id in self.footnotes.keys()]
footnotes.sort()
for i, id in footnotes :
li = doc.createElement('li')
li.setAttribute('id', self.makeFootnoteId(i))
self.md._processSection(li, self.footnotes[id].split("\n"))
#li.appendChild(doc.createTextNode(self.footnotes[id]))
backlink = doc.createElement('a')
backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
backlink.setAttribute('class', 'footnoteBackLink')
backlink.setAttribute('title',
'Jump back to footnote %d in the text' % 1)
backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
if li.childNodes :
node = li.childNodes[-1]
if node.type == "text" :
node = li
node.appendChild(backlink)
ol.appendChild(li)
return div
class FootnotePreprocessor :
def __init__ (self, footnotes) :
self.footnotes = footnotes
def run(self, lines) :
self.blockGuru = markdown.BlockGuru()
lines = self._handleFootnoteDefinitions (lines)
# Make a hash of all footnote marks in the text so that we
# know in what order they are supposed to appear. (This
# function call doesn't really substitute anything - it's just
# a way to get a callback for each occurence.
text = "\n".join(lines)
self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
return text.split("\n")
def recordFootnoteUse(self, match) :
id = match.group(1)
id = id.strip()
nextNum = len(self.footnotes.used_footnotes.keys()) + 1
self.footnotes.used_footnotes[id] = nextNum
def _handleFootnoteDefinitions(self, lines) :
"""Recursively finds all footnote definitions in the lines.
@param lines: a list of lines of text
@returns: a string representing the text with footnote
definitions removed """
i, id, footnote = self._findFootnoteDefinition(lines)
if id :
plain = lines[:i]
detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
self.footnotes.setFootnote(id,
footnote + "\n"
+ "\n".join(detabbed))
more_plain = self._handleFootnoteDefinitions(theRest)
return plain + [""] + more_plain
else :
return lines
def _findFootnoteDefinition(self, lines) :
"""Finds the first line of a footnote definition.
@param lines: a list of lines of text
@returns: the index of the line containing a footnote definition """
counter = 0
for line in lines :
m = self.footnotes.DEF_RE.match(line)
if m :
return counter, m.group(2), m.group(3)
counter += 1
return counter, None, None
class FootnotePattern (markdown.Pattern) :
def __init__ (self, pattern, footnotes) :
markdown.Pattern.__init__(self, pattern)
self.footnotes = footnotes
def handleMatch(self, m, doc) :
sup = doc.createElement('sup')
a = doc.createElement('a')
sup.appendChild(a)
id = m.group(2)
num = self.footnotes.used_footnotes[id]
sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
a.appendChild(doc.createTextNode(str(num)))
return sup
class FootnotePostprocessor (markdown.Postprocessor):
def __init__ (self, footnotes) :
self.footnotes = footnotes
def run(self, doc) :
footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
if footnotesDiv :
fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
if fnPlaceholder :
fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
else :
doc.documentElement.appendChild(footnotesDiv)
class FootnoteTextPostprocessor (markdown.Postprocessor):
def __init__ (self, footnotes) :
self.footnotes = footnotes
def run(self, text) :
return text.replace(FN_BACKLINK_TEXT, "&#8617;")
def makeExtension(configs=None) :
return FootnoteExtension(configs=configs)

View File

@ -0,0 +1,65 @@
#!/usr/bin/env python
"""
Table extension for Python-Markdown
"""
import markdown
class TablePattern(markdown.Pattern) :
def __init__ (self, md):
markdown.Pattern.__init__(self, r'^\|([^\n]*)\|(\n|$)')
self.md = md
def handleMatch(self, m, doc) :
# a single line represents a row
tr = doc.createElement('tr')
tr.appendChild(doc.createTextNode('\n'))
# chunks between pipes represent cells
for t in m.group(2).split('|'):
if len(t) >= 2 and t.startswith('*') and t.endswith('*'):
# if a cell is bounded by asterisks, it is a <th>
td = doc.createElement('th')
t = t[1:-1]
else:
# otherwise it is a <td>
td = doc.createElement('td')
# apply inline patterns on chunks
for n in self.md._handleInline(t):
if(type(n) == unicode):
td.appendChild(doc.createTextNode(n))
else:
td.appendChild(n)
tr.appendChild(td)
# very long lines are evil
tr.appendChild(doc.createTextNode('\n'))
return tr
class TablePostprocessor:
def run(self, doc):
# markdown wrapped our <tr>s in a <p>, we fix that here
def test_for_p(element):
return element.type == 'element' and element.nodeName == 'p'
# replace "p > tr" with "table > tr"
for element in doc.find(test_for_p):
for node in element.childNodes:
if(node.type == 'text' and node.value.strip() == ''):
# skip leading whitespace
continue
if (node.type == 'element' and node.nodeName == 'tr'):
element.nodeName = 'table'
break
class TableExtension(markdown.Extension):
def extendMarkdown(self, md, md_globals):
md.inlinePatterns.insert(0, TablePattern(md))
md.postprocessors.append(TablePostprocessor())
def makeExtension(configs):
return TableExtension(configs)

View File

@ -0,0 +1,165 @@
## To access this file as plain text go to
## http://freewisdom.org/projects/python-markdown/mdx_toc.raw_content
"""
Chris Clark - clach04 -at- sf.net
My markdown extensions for adding:
Table of Contents (aka toc)
"""
import os
import sys
import re
import markdown
DEFAULT_TITLE = None
def extract_alphanumeric(in_str=None):
"""take alpha-numeric (7bit ascii) and return as a string
"""
# I'm sure this is really inefficient and
# could be done with a lambda/map()
#x.strip().title().replace(' ', "")
out_str=[]
for x in in_str.title():
if x.isalnum(): out_str.append(x)
return ''.join(out_str)
class TitlePostprocessor (markdown.Postprocessor):
def __init__ (self, extension) :
self.extension = extension
def run(self, doc) :
titleElement = self.extension.createTitle(doc)
if titleElement :
doc.documentElement.insertChild(0, titleElement)
class TocExtension (markdown.Extension):
"""Markdown extension: generate a Table Of Contents (aka toc)
toc is returned in a div tag with class='toc'
toc is either:
appended to end of document
OR
replaces first string occurence of "///Table of Contents Goes Here///"
"""
def __init__ (self) :
#maybe add these as parameters to the class init?
self.TOC_INCLUDE_MARKER = "///Table of Contents///"
self.TOC_TITLE = "Table Of Contents"
self.auto_toc_heading_type=2
self.toc_heading_type=3
def extendMarkdown(self, md, md_globals) :
# Just insert in the end
md.postprocessors.append(TocPostprocessor(self))
# Stateless extensions do not need to be registered, so we don't
# register.
def findTocPlaceholder(self, doc) :
def findTocPlaceholderFn(node=None, indent=0):
if node.type == 'text':
if node.value.find(self.TOC_INCLUDE_MARKER) > -1 :
return True
toc_div_list = doc.find(findTocPlaceholderFn)
if toc_div_list :
return toc_div_list[0]
def createTocDiv(self, doc) :
"""
Creates Table Of Contents based on headers.
@returns: toc as a single as a dom element
in a <div> tag with class='toc'
"""
# Find headers
headers_compiled_re = re.compile("h[123456]", re.IGNORECASE)
def findHeadersFn(element=None):
if element.type=='element':
if headers_compiled_re.match(element.nodeName):
return True
headers_doc_list = doc.find(findHeadersFn)
# Insert anchor tags into dom
generated_anchor_id=0
headers_list=[]
min_header_size_found = 6
for element in headers_doc_list:
heading_title = element.childNodes[0].value
if heading_title.strip() !="":
heading_type = int(element.nodeName[-1:])
if heading_type == self.auto_toc_heading_type:
min_header_size_found=min(min_header_size_found,
heading_type)
html_anchor_name= (extract_alphanumeric(heading_title)
+'__MD_autoTOC_%d' % (generated_anchor_id))
# insert anchor tag inside header tags
html_anchor = doc.createElement("a")
html_anchor.setAttribute('name', html_anchor_name)
element.appendChild(html_anchor)
headers_list.append( (heading_type, heading_title,
html_anchor_name) )
generated_anchor_id = generated_anchor_id + 1
# create dom for toc
if headers_list != []:
# Create list
toc_doc_list = doc.createElement("ul")
for (heading_type, heading_title, html_anchor_name) in headers_list:
if heading_type == self.auto_toc_heading_type:
toc_doc_entry = doc.createElement("li")
toc_doc_link = doc.createElement("a")
toc_doc_link.setAttribute('href', '#'+html_anchor_name)
toc_doc_text = doc.createTextNode(heading_title)
toc_doc_link.appendChild(toc_doc_text)
toc_doc_entry.appendChild(toc_doc_link)
toc_doc_list.appendChild(toc_doc_entry)
# Put list into div
div = doc.createElement("div")
div.setAttribute('class', 'toc')
if self.TOC_TITLE:
toc_header = doc.createElement("h%d"%(self.toc_heading_type) )
toc_header_text = doc.createTextNode(self.TOC_TITLE)
toc_header.appendChild(toc_header_text)
div.appendChild(toc_header)
div.appendChild(toc_doc_list)
#hr = doc.createElement("hr")
#div.appendChild(hr)
return div
class TocPostprocessor (markdown.Postprocessor):
def __init__ (self, toc) :
self.toc = toc
def run(self, doc):
tocPlaceholder = self.toc.findTocPlaceholder(doc)
tocDiv = self.toc.createTocDiv(doc)
if tocDiv:
if tocPlaceholder :
# Replace "magic" pattern with toc
tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
else :
# Dump at the end of the DOM
# Probably want to use CSS to position div
doc.documentElement.appendChild(tocDiv)
def makeExtension(configs=None) :
return TocExtension()

View File

@ -47,7 +47,7 @@ class _TemporaryFileWrapper(object):
os.remove(self.name) os.remove(self.name)
def PersistentTemporaryFile(suffix="", prefix=""): def PersistentTemporaryFile(suffix="", prefix="", dir=None):
""" """
Return a temporary file that is available even after being closed on Return a temporary file that is available even after being closed on
all platforms. It is automatically deleted when this object is deleted. all platforms. It is automatically deleted when this object is deleted.
@ -55,6 +55,7 @@ def PersistentTemporaryFile(suffix="", prefix=""):
""" """
if prefix == None: if prefix == None:
prefix = "" prefix = ""
fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix) fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix,
dir=dir)
_file = os.fdopen(fd, "wb") _file = os.fdopen(fd, "wb")
return _TemporaryFileWrapper(_file, name) return _TemporaryFileWrapper(_file, name)

View File

@ -9,6 +9,7 @@ PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500"
DOWNLOADS = PREFIX+"/httpdocs/downloads" DOWNLOADS = PREFIX+"/httpdocs/downloads"
DOCS = PREFIX+"/httpdocs/apidocs" DOCS = PREFIX+"/httpdocs/apidocs"
HTML2LRF = "src/libprs500/ebooks/lrf/html/demo" HTML2LRF = "src/libprs500/ebooks/lrf/html/demo"
TXT2LRF = "src/libprs500/ebooks/lrf/txt/demo"
check_call = partial(_check_call, shell=True) check_call = partial(_check_call, shell=True)
h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION) h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION)
@ -19,7 +20,7 @@ def build_windows():
vm = h.openVM('/mnt/extra/vmware/Windows Vista/Windows Vista.vmx') vm = h.openVM('/mnt/backup/vmware/Windows Vista/Windows Vista.vmx')
vm.powerOn() vm.powerOn()
if not vm.waitForToolsInGuest(): if not vm.waitForToolsInGuest():
print >>sys.stderr, 'Windows is not booting up' print >>sys.stderr, 'Windows is not booting up'
@ -43,7 +44,7 @@ def build_osx():
if os.path.exists('dist/dmgdone'): if os.path.exists('dist/dmgdone'):
os.unlink('dist/dmgdone') os.unlink('dist/dmgdone')
vm = h.openVM('/mnt/extra/vmware/Mac OSX/Mac OSX.vmx') vm = h.openVM('/mnt/backup/vmware/Mac OSX/Mac OSX.vmx')
vm.powerOn() vm.powerOn()
c = 25 * 60 c = 25 * 60
print 'Waiting (minutes):', print 'Waiting (minutes):',
@ -69,6 +70,8 @@ def upload_demo():
f.close() f.close()
check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,)) check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,))
check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,)) check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
check_call('''txt2lrf -t 'Demonstration of txt2lrf' -a 'Kovid Goyal' --header -o /tmp/txt2lrf.lrf %s/demo.txt'''%(TXT2LRF,) )
check_call('''scp /tmp/txt2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
def upload_installers(exe, dmg): def upload_installers(exe, dmg):
check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,)) check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,))