mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-07 10:14:46 -04:00
Implement support for markdown in txt2lrf. Fix handling of <pre>, <th> elements in html2lrf
This commit is contained in:
parent
a082131823
commit
9178ddf18e
1
setup.py
1
setup.py
@ -26,6 +26,7 @@ entry_points = {
|
||||
'rtf-meta = libprs500.ebooks.metadata.rtf:main', \
|
||||
'txt2lrf = libprs500.ebooks.lrf.txt.convert_from:main', \
|
||||
'html2lrf = libprs500.ebooks.lrf.html.convert_from:main',\
|
||||
'markdown = libprs500.ebooks.markdown.markdown:main',\
|
||||
],
|
||||
'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main']
|
||||
}
|
||||
|
@ -13,7 +13,7 @@
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
''' E-book management software'''
|
||||
__version__ = "0.3.52"
|
||||
__version__ = "0.3.53"
|
||||
__docformat__ = "epytext"
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
__appname__ = 'libprs500'
|
||||
|
@ -73,6 +73,16 @@ def option_parser(usage):
|
||||
profiles=['prs500']
|
||||
parser.add_option('-o', '--output', action='store', default=None, \
|
||||
help='Output file name. Default is derived from input filename')
|
||||
laf = parser.add_option_group('LOOK AND FEEL')
|
||||
laf.add_option('--cover', action='store', dest='cover', default=None, \
|
||||
help='Path to file containing image to be used as cover')
|
||||
laf.add_option('--font-delta', action='store', type='float', default=0., \
|
||||
help="""Increase the font size by 2 * FONT_DELTA pts and """
|
||||
'''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
|
||||
"""If FONT_DELTA is negative, the font size is decreased.""",
|
||||
dest='font_delta')
|
||||
laf.add_option('--disable-autorotation', action='store_true', default=False,
|
||||
help='Disable autorotation of images.', dest='disable_autorotation')
|
||||
page = parser.add_option_group('PAGE OPTIONS')
|
||||
page.add_option('-p', '--profile', default=PRS500_PROFILE, dest='profile', type='choice',
|
||||
choices=profiles, action='callback', callback=profile_from_string,
|
||||
|
@ -221,6 +221,7 @@ class HTMLConverter(object):
|
||||
chapter_regex=re.compile('chapter|book|appendix', re.IGNORECASE),
|
||||
link_exclude=re.compile('$'),
|
||||
page_break=re.compile('h[12]', re.IGNORECASE),
|
||||
force_page_break=re.compile('$', re.IGNORECASE),
|
||||
profile=PRS500_PROFILE,
|
||||
disable_autorotation=False):
|
||||
'''
|
||||
@ -273,7 +274,8 @@ class HTMLConverter(object):
|
||||
small = {'font-size' :'small'},
|
||||
pre = {'font-family' :'monospace' },
|
||||
tt = {'font-family' :'monospace'},
|
||||
center = {'text-align' : 'center'}
|
||||
center = {'text-align' : 'center'},
|
||||
th = {'font-size':'large', 'font-weight':'bold'},
|
||||
)
|
||||
self.profile = profile #: Defines the geometry of the display device
|
||||
self.chapter_detection = chapter_detection #: Flag to toggle chapter detection
|
||||
@ -287,7 +289,8 @@ class HTMLConverter(object):
|
||||
self.blockquote_style = book.create_block_style(sidemargin=60,
|
||||
topskip=20, footskip=20)
|
||||
self.unindented_style = book.create_text_style(parindent=0)
|
||||
self.page_break = page_break #: Regex controlling forced page-break behavior
|
||||
self.page_break = page_break #: Regex controlling page-break behavior
|
||||
self.force_page_break = force_page_break #: Regex controlling forced page-break behavior
|
||||
self.text_styles = []#: Keep track of already used textstyles
|
||||
self.block_styles = []#: Keep track of already used blockstyles
|
||||
self.images = {} #: Images referenced in the HTML document
|
||||
@ -559,6 +562,7 @@ class HTMLConverter(object):
|
||||
chapter_regex=self.chapter_regex,
|
||||
link_exclude=self.link_exclude,
|
||||
page_break=self.page_break,
|
||||
force_page_break=self.force_page_break,
|
||||
disable_autorotation=self.disable_autorotation)
|
||||
HTMLConverter.processed_files[path] = self.files[path]
|
||||
except Exception:
|
||||
@ -829,6 +833,9 @@ class HTMLConverter(object):
|
||||
tag_css['page-break-after'].lower() != 'avoid':
|
||||
end_page = True
|
||||
tag_css.pop('page-break-after')
|
||||
if self.force_page_break.match(tagname):
|
||||
self.end_page()
|
||||
self.page_break_found = True
|
||||
if not self.page_break_found and self.page_break.match(tagname):
|
||||
if len(self.current_page.contents) > 3:
|
||||
self.end_page()
|
||||
@ -956,6 +963,7 @@ class HTMLConverter(object):
|
||||
except ConversionError:
|
||||
pass
|
||||
self.end_current_block()
|
||||
self.current_block = self.book.create_text_block()
|
||||
elif tagname in ['ul', 'ol']:
|
||||
self.in_ol = 1 if tagname == 'ol' else 0
|
||||
self.end_current_block()
|
||||
@ -1138,13 +1146,15 @@ def process_file(path, options):
|
||||
re.compile('$')
|
||||
pb = re.compile(options.page_break, re.IGNORECASE) if options.page_break else \
|
||||
re.compile('$')
|
||||
fpb = re.compile(options.force_page_break, re.IGNORECASE) if options.force_page_break else \
|
||||
re.compile('$')
|
||||
conv = HTMLConverter(book, path, profile=options.profile,
|
||||
font_delta=options.font_delta,
|
||||
cover=cpath, max_link_levels=options.link_levels,
|
||||
verbose=options.verbose, baen=options.baen,
|
||||
chapter_detection=options.chapter_detection,
|
||||
chapter_regex=re.compile(options.chapter_regex, re.IGNORECASE),
|
||||
link_exclude=re.compile(le), page_break=pb,
|
||||
link_exclude=re.compile(le), page_break=pb, force_page_break=fpb,
|
||||
disable_autorotation=options.disable_autorotation)
|
||||
conv.process_links()
|
||||
oname = options.output
|
||||
@ -1220,23 +1230,14 @@ def try_opf(path, options):
|
||||
|
||||
|
||||
|
||||
def parse_options(argv=None, cli=True):
|
||||
def parse_options(argv=None, cli=True, parser=None):
|
||||
""" CLI for html -> lrf conversions """
|
||||
if not argv:
|
||||
argv = sys.argv[1:]
|
||||
if not parser:
|
||||
parser = option_parser("""usage: %prog [options] mybook.[html|rar|zip]
|
||||
|
||||
%prog converts mybook.html to mybook.lrf""")
|
||||
laf = parser.add_option_group('LOOK AND FEEL')
|
||||
laf.add_option('--cover', action='store', dest='cover', default=None, \
|
||||
help='Path to file containing image to be used as cover')
|
||||
laf.add_option('--font-delta', action='store', type='float', default=0., \
|
||||
help="""Increase the font size by 2 * FONT_DELTA pts and """
|
||||
'''the line spacing by FONT_DELTA pts. FONT_DELTA can be a fraction.'''
|
||||
"""If FONT_DELTA is negative, the font size is decreased.""",
|
||||
dest='font_delta')
|
||||
laf.add_option('--disable-autorotation', action='store_true', default=False,
|
||||
help='Disable autorotation of images.', dest='disable_autorotation')
|
||||
link = parser.add_option_group('LINK PROCESSING OPTIONS')
|
||||
link.add_option('--link-levels', action='store', type='int', default=sys.maxint, \
|
||||
dest='link_levels',
|
||||
@ -1265,6 +1266,8 @@ def parse_options(argv=None, cli=True):
|
||||
'''there are no really long pages as this degrades the page '''
|
||||
'''turn performance of the LRF. Thus this option is ignored '''
|
||||
'''if the current page has only a few elements.''')
|
||||
chapter.add_option('--force-page-break-before', dest='force_page_break',
|
||||
default='$', help='Like --page-break-before, but page breaks are forced.')
|
||||
prepro = parser.add_option_group('PREPROCESSING OPTIONS')
|
||||
prepro.add_option('--baen', action='store_true', default=False, dest='baen',
|
||||
help='''Preprocess Baen HTML files to improve generated LRF.''')
|
||||
@ -1285,7 +1288,8 @@ def main():
|
||||
if options.verbose:
|
||||
import warnings
|
||||
warnings.defaultaction = 'error'
|
||||
except:
|
||||
except Exception, err:
|
||||
print >> sys.stderr, err
|
||||
sys.exit(1)
|
||||
process_file(src, options)
|
||||
|
||||
|
@ -12,7 +12,7 @@
|
||||
## You should have received a copy of the GNU General Public License along
|
||||
## with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
import math, sys
|
||||
import math, sys, re
|
||||
|
||||
from libprs500.ebooks.lrf.fonts import get_font
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
|
||||
@ -215,7 +215,7 @@ class Row(object):
|
||||
def __init__(self, conv, row, css, colpad):
|
||||
self.cells = []
|
||||
self.colpad = colpad
|
||||
cells = row.findAll('td')
|
||||
cells = row.findAll(re.compile('td|th'))
|
||||
for cell in cells:
|
||||
ccss = conv.tag_css(cell, css)
|
||||
self.cells.append(Cell(conv, cell, ccss))
|
||||
|
@ -15,19 +15,17 @@
|
||||
"""
|
||||
Convert .txt files to .lrf
|
||||
"""
|
||||
import os, sys
|
||||
import os, sys, codecs
|
||||
|
||||
from libprs500.ebooks import BeautifulSoup
|
||||
from libprs500.ebooks.lrf import ConversionError, option_parser
|
||||
from libprs500.ebooks.lrf import Book
|
||||
from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
|
||||
from libprs500 import filename_to_utf8
|
||||
from libprs500 import iswindows
|
||||
from libprs500.ptempfile import PersistentTemporaryFile
|
||||
from libprs500.ebooks.lrf import ConversionError, option_parser
|
||||
from libprs500.ebooks.lrf.html.convert_from import parse_options as html_parse_options
|
||||
from libprs500.ebooks.lrf.html.convert_from import process_file
|
||||
from libprs500.ebooks.markdown import markdown
|
||||
|
||||
def parse_options(argv=None, cli=True):
|
||||
def parse_options(cli=True):
|
||||
""" CLI for txt -> lrf conversions """
|
||||
if not argv:
|
||||
argv = sys.argv[1:]
|
||||
parser = option_parser(
|
||||
"""usage: %prog [options] mybook.txt
|
||||
|
||||
@ -44,84 +42,78 @@ def parse_options(argv=None, cli=True):
|
||||
if cli:
|
||||
parser.print_help()
|
||||
raise ConversionError, 'no filename specified'
|
||||
if options.title == None:
|
||||
options.title = filename_to_utf8(os.path.splitext(os.path.basename(args[0]))[0])
|
||||
return options, args, parser
|
||||
|
||||
def generate_html(txtfile, encoding):
|
||||
'''
|
||||
Convert txtfile to html and return a PersistentTemporaryFile object pointing
|
||||
to the file with the HTML.
|
||||
'''
|
||||
encodings = ['iso-8859-1', 'koi8_r', 'koi8_u', 'utf8']
|
||||
if iswindows:
|
||||
encodings = ['cp1252'] + encodings
|
||||
if encoding not in ['cp1252', 'utf8']:
|
||||
encodings = [encoding] + encodings
|
||||
txt, enc = None, None
|
||||
for encoding in encodings:
|
||||
try:
|
||||
txt = codecs.open(txtfile, 'rb', encoding).read()
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
enc = encoding
|
||||
break
|
||||
if txt == None:
|
||||
raise ConversionError, 'Could not detect encoding of %s'%(txtfile,)
|
||||
md = markdown.Markdown(txt,
|
||||
extensions=['footnotes', 'tables', 'toc'],
|
||||
encoding=enc,
|
||||
safe_mode=False,
|
||||
)
|
||||
html = md.toString().decode(enc)
|
||||
p = PersistentTemporaryFile('.html', dir=os.path.dirname(txtfile))
|
||||
p.close()
|
||||
codecs.open(p.name, 'wb', enc).write(html)
|
||||
return p
|
||||
|
||||
def main():
|
||||
try:
|
||||
options, args, parser = parse_options()
|
||||
src = os.path.abspath(os.path.expanduser(args[0]))
|
||||
except:
|
||||
txt = os.path.abspath(os.path.expanduser(args[0]))
|
||||
p = generate_html(txt, options.encoding)
|
||||
for i in range(1, len(sys.argv)):
|
||||
if sys.argv[i] == args[0]:
|
||||
sys.argv.remove(sys.argv[i])
|
||||
break
|
||||
sys.argv.append(p.name)
|
||||
sys.argv.append('--force-page-break-before')
|
||||
sys.argv.append('h2')
|
||||
o_spec = False
|
||||
for arg in sys.argv[1:]:
|
||||
arg = arg.lstrip()
|
||||
if arg.startswith('-o') or arg.startswith('--output'):
|
||||
o_spec = True
|
||||
break
|
||||
ext = '.lrf'
|
||||
for arg in sys.argv[1:]:
|
||||
if arg.strip() == '--lrs':
|
||||
ext = '.lrs'
|
||||
break
|
||||
if not o_spec:
|
||||
sys.argv.append('-o')
|
||||
sys.argv.append(os.path.splitext(os.path.basename(txt))[0]+ext)
|
||||
options, args, parser = html_parse_options(parser=parser)
|
||||
src = args[0]
|
||||
if options.verbose:
|
||||
import warnings
|
||||
warnings.defaultaction = 'error'
|
||||
except Exception, err:
|
||||
print >> sys.stderr, err
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
print 'Output written to ', convert_txt(src, options)
|
||||
process_file(src, options)
|
||||
|
||||
|
||||
def convert_txt(path, options):
|
||||
"""
|
||||
Convert the text file at C{path} into an lrf file.
|
||||
@param options: Object with the following attributes:
|
||||
C{author}, C{title}, C{encoding} (the assumed encoding of
|
||||
the text in C{path}.)
|
||||
"""
|
||||
import codecs
|
||||
header = None
|
||||
if options.header:
|
||||
header = Paragraph()
|
||||
header.append(Bold(options.title))
|
||||
header.append(' by ')
|
||||
header.append(Italic(options.author))
|
||||
title = (options.title, options.title_sort)
|
||||
author = (options.author, options.author_sort)
|
||||
book = Book(options, header=header, title=title, author=author, \
|
||||
publisher=options.publisher,
|
||||
sourceencoding=options.encoding, freetext=options.freetext, \
|
||||
category=options.category, booksetting=BookSetting
|
||||
(dpi=10*options.profile.dpi,
|
||||
screenheight=options.profile.screen_height,
|
||||
screenwidth=options.profile.screen_width))
|
||||
buffer = ''
|
||||
pg = book.create_page()
|
||||
block = book.create_text_block()
|
||||
pg.append(block)
|
||||
book.append(pg)
|
||||
lines = ""
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', options.encoding).readlines()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'cp1252').readlines()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'koi8_r').readlines()
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
lines = codecs.open(path, 'rb', 'koi8_u').readlines()
|
||||
except UnicodeDecodeError:
|
||||
lines = codecs.open(path, 'rb', 'utf8').readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line:
|
||||
buffer = buffer.rstrip() + ' ' + line
|
||||
else:
|
||||
block.Paragraph(buffer)
|
||||
buffer = ''
|
||||
basename = os.path.basename(path)
|
||||
oname = options.output
|
||||
if not oname:
|
||||
oname = os.path.splitext(basename)[0]+('.lrs' if options.lrs else '.lrf')
|
||||
oname = os.path.abspath(os.path.expanduser(oname))
|
||||
try:
|
||||
book.renderLrs(oname) if options.lrs else book.renderLrf(oname)
|
||||
except UnicodeDecodeError:
|
||||
raise ConversionError(path + ' is not encoded in ' + \
|
||||
options.encoding +'. Specify the '+ \
|
||||
'correct encoding with the -e option.')
|
||||
return os.path.abspath(oname)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
5
src/libprs500/ebooks/markdown/__init__.py
Normal file
5
src/libprs500/ebooks/markdown/__init__.py
Normal file
@ -0,0 +1,5 @@
|
||||
''' Package defines lightweight markup language for processing of txt files'''
|
||||
# Initialize extensions
|
||||
from libprs500.ebooks.markdown import mdx_footnotes
|
||||
from libprs500.ebooks.markdown import mdx_tables
|
||||
from libprs500.ebooks.markdown import mdx_toc
|
1671
src/libprs500/ebooks/markdown/markdown.py
Normal file
1671
src/libprs500/ebooks/markdown/markdown.py
Normal file
File diff suppressed because it is too large
Load Diff
255
src/libprs500/ebooks/markdown/mdx_footnotes.py
Normal file
255
src/libprs500/ebooks/markdown/mdx_footnotes.py
Normal file
@ -0,0 +1,255 @@
|
||||
"""
|
||||
## To see this file as plain text go to
|
||||
## http://freewisdom.org/projects/python-markdown/mdx_footnotes.raw_content
|
||||
|
||||
========================= FOOTNOTES =================================
|
||||
|
||||
This section adds footnote handling to markdown. It can be used as
|
||||
an example for extending python-markdown with relatively complex
|
||||
functionality. While in this case the extension is included inside
|
||||
the module itself, it could just as easily be added from outside the
|
||||
module. Not that all markdown classes above are ignorant about
|
||||
footnotes. All footnote functionality is provided separately and
|
||||
then added to the markdown instance at the run time.
|
||||
|
||||
Footnote functionality is attached by calling extendMarkdown()
|
||||
method of FootnoteExtension. The method also registers the
|
||||
extension to allow it's state to be reset by a call to reset()
|
||||
method.
|
||||
"""
|
||||
|
||||
FN_BACKLINK_TEXT = "zz1337820767766393qq"
|
||||
|
||||
|
||||
import re, markdown, random
|
||||
|
||||
class FootnoteExtension (markdown.Extension):
|
||||
|
||||
DEF_RE = re.compile(r'(\ ?\ ?\ ?)\[\^([^\]]*)\]:\s*(.*)')
|
||||
SHORT_USE_RE = re.compile(r'\[\^([^\]]*)\]', re.M) # [^a]
|
||||
|
||||
def __init__ (self, configs) :
|
||||
|
||||
self.config = {'PLACE_MARKER' :
|
||||
["///Footnotes Go Here///",
|
||||
"The text string that marks where the footnotes go"]}
|
||||
|
||||
for key, value in configs :
|
||||
self.config[key][0] = value
|
||||
|
||||
self.reset()
|
||||
|
||||
def extendMarkdown(self, md, md_globals) :
|
||||
|
||||
self.md = md
|
||||
|
||||
# Stateless extensions do not need to be registered
|
||||
md.registerExtension(self)
|
||||
|
||||
# Insert a preprocessor before ReferencePreprocessor
|
||||
index = md.preprocessors.index(md_globals['REFERENCE_PREPROCESSOR'])
|
||||
preprocessor = FootnotePreprocessor(self)
|
||||
preprocessor.md = md
|
||||
md.preprocessors.insert(index, preprocessor)
|
||||
|
||||
# Insert an inline pattern before ImageReferencePattern
|
||||
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
|
||||
index = md.inlinePatterns.index(md_globals['IMAGE_REFERENCE_PATTERN'])
|
||||
md.inlinePatterns.insert(index, FootnotePattern(FOOTNOTE_RE, self))
|
||||
|
||||
# Insert a post-processor that would actually add the footnote div
|
||||
postprocessor = FootnotePostprocessor(self)
|
||||
postprocessor.extension = self
|
||||
|
||||
md.postprocessors.append(postprocessor)
|
||||
|
||||
textPostprocessor = FootnoteTextPostprocessor(self)
|
||||
|
||||
md.textPostprocessors.append(textPostprocessor)
|
||||
|
||||
|
||||
def reset(self) :
|
||||
# May be called by Markdown is state reset is desired
|
||||
|
||||
self.footnote_suffix = "-" + str(int(random.random()*1000000000))
|
||||
self.used_footnotes={}
|
||||
self.footnotes = {}
|
||||
|
||||
def findFootnotesPlaceholder(self, doc) :
|
||||
def findFootnotePlaceholderFn(node=None, indent=0):
|
||||
if node.type == 'text':
|
||||
if node.value.find(self.getConfig("PLACE_MARKER")) > -1 :
|
||||
return True
|
||||
|
||||
fn_div_list = doc.find(findFootnotePlaceholderFn)
|
||||
if fn_div_list :
|
||||
return fn_div_list[0]
|
||||
|
||||
|
||||
def setFootnote(self, id, text) :
|
||||
self.footnotes[id] = text
|
||||
|
||||
def makeFootnoteId(self, num) :
|
||||
return 'fn%d%s' % (num, self.footnote_suffix)
|
||||
|
||||
def makeFootnoteRefId(self, num) :
|
||||
return 'fnr%d%s' % (num, self.footnote_suffix)
|
||||
|
||||
def makeFootnotesDiv (self, doc) :
|
||||
"""Creates the div with class='footnote' and populates it with
|
||||
the text of the footnotes.
|
||||
|
||||
@returns: the footnote div as a dom element """
|
||||
|
||||
if not self.footnotes.keys() :
|
||||
return None
|
||||
|
||||
div = doc.createElement("div")
|
||||
div.setAttribute('class', 'footnote')
|
||||
hr = doc.createElement("hr")
|
||||
div.appendChild(hr)
|
||||
ol = doc.createElement("ol")
|
||||
div.appendChild(ol)
|
||||
|
||||
footnotes = [(self.used_footnotes[id], id)
|
||||
for id in self.footnotes.keys()]
|
||||
footnotes.sort()
|
||||
|
||||
for i, id in footnotes :
|
||||
li = doc.createElement('li')
|
||||
li.setAttribute('id', self.makeFootnoteId(i))
|
||||
|
||||
self.md._processSection(li, self.footnotes[id].split("\n"))
|
||||
|
||||
#li.appendChild(doc.createTextNode(self.footnotes[id]))
|
||||
|
||||
backlink = doc.createElement('a')
|
||||
backlink.setAttribute('href', '#' + self.makeFootnoteRefId(i))
|
||||
backlink.setAttribute('class', 'footnoteBackLink')
|
||||
backlink.setAttribute('title',
|
||||
'Jump back to footnote %d in the text' % 1)
|
||||
backlink.appendChild(doc.createTextNode(FN_BACKLINK_TEXT))
|
||||
|
||||
if li.childNodes :
|
||||
node = li.childNodes[-1]
|
||||
if node.type == "text" :
|
||||
node = li
|
||||
node.appendChild(backlink)
|
||||
|
||||
ol.appendChild(li)
|
||||
|
||||
return div
|
||||
|
||||
|
||||
class FootnotePreprocessor :
|
||||
|
||||
def __init__ (self, footnotes) :
|
||||
self.footnotes = footnotes
|
||||
|
||||
def run(self, lines) :
|
||||
|
||||
self.blockGuru = markdown.BlockGuru()
|
||||
lines = self._handleFootnoteDefinitions (lines)
|
||||
|
||||
# Make a hash of all footnote marks in the text so that we
|
||||
# know in what order they are supposed to appear. (This
|
||||
# function call doesn't really substitute anything - it's just
|
||||
# a way to get a callback for each occurence.
|
||||
|
||||
text = "\n".join(lines)
|
||||
self.footnotes.SHORT_USE_RE.sub(self.recordFootnoteUse, text)
|
||||
|
||||
return text.split("\n")
|
||||
|
||||
|
||||
def recordFootnoteUse(self, match) :
|
||||
|
||||
id = match.group(1)
|
||||
id = id.strip()
|
||||
nextNum = len(self.footnotes.used_footnotes.keys()) + 1
|
||||
self.footnotes.used_footnotes[id] = nextNum
|
||||
|
||||
|
||||
def _handleFootnoteDefinitions(self, lines) :
|
||||
"""Recursively finds all footnote definitions in the lines.
|
||||
|
||||
@param lines: a list of lines of text
|
||||
@returns: a string representing the text with footnote
|
||||
definitions removed """
|
||||
|
||||
i, id, footnote = self._findFootnoteDefinition(lines)
|
||||
|
||||
if id :
|
||||
|
||||
plain = lines[:i]
|
||||
|
||||
detabbed, theRest = self.blockGuru.detectTabbed(lines[i+1:])
|
||||
|
||||
self.footnotes.setFootnote(id,
|
||||
footnote + "\n"
|
||||
+ "\n".join(detabbed))
|
||||
|
||||
more_plain = self._handleFootnoteDefinitions(theRest)
|
||||
return plain + [""] + more_plain
|
||||
|
||||
else :
|
||||
return lines
|
||||
|
||||
def _findFootnoteDefinition(self, lines) :
|
||||
"""Finds the first line of a footnote definition.
|
||||
|
||||
@param lines: a list of lines of text
|
||||
@returns: the index of the line containing a footnote definition """
|
||||
|
||||
counter = 0
|
||||
for line in lines :
|
||||
m = self.footnotes.DEF_RE.match(line)
|
||||
if m :
|
||||
return counter, m.group(2), m.group(3)
|
||||
counter += 1
|
||||
return counter, None, None
|
||||
|
||||
|
||||
class FootnotePattern (markdown.Pattern) :
|
||||
|
||||
def __init__ (self, pattern, footnotes) :
|
||||
|
||||
markdown.Pattern.__init__(self, pattern)
|
||||
self.footnotes = footnotes
|
||||
|
||||
def handleMatch(self, m, doc) :
|
||||
sup = doc.createElement('sup')
|
||||
a = doc.createElement('a')
|
||||
sup.appendChild(a)
|
||||
id = m.group(2)
|
||||
num = self.footnotes.used_footnotes[id]
|
||||
sup.setAttribute('id', self.footnotes.makeFootnoteRefId(num))
|
||||
a.setAttribute('href', '#' + self.footnotes.makeFootnoteId(num))
|
||||
a.appendChild(doc.createTextNode(str(num)))
|
||||
return sup
|
||||
|
||||
class FootnotePostprocessor (markdown.Postprocessor):
|
||||
|
||||
def __init__ (self, footnotes) :
|
||||
self.footnotes = footnotes
|
||||
|
||||
def run(self, doc) :
|
||||
footnotesDiv = self.footnotes.makeFootnotesDiv(doc)
|
||||
if footnotesDiv :
|
||||
fnPlaceholder = self.extension.findFootnotesPlaceholder(doc)
|
||||
if fnPlaceholder :
|
||||
fnPlaceholder.parent.replaceChild(fnPlaceholder, footnotesDiv)
|
||||
else :
|
||||
doc.documentElement.appendChild(footnotesDiv)
|
||||
|
||||
class FootnoteTextPostprocessor (markdown.Postprocessor):
|
||||
|
||||
def __init__ (self, footnotes) :
|
||||
self.footnotes = footnotes
|
||||
|
||||
def run(self, text) :
|
||||
return text.replace(FN_BACKLINK_TEXT, "↩")
|
||||
|
||||
def makeExtension(configs=None) :
|
||||
return FootnoteExtension(configs=configs)
|
||||
|
65
src/libprs500/ebooks/markdown/mdx_tables.py
Normal file
65
src/libprs500/ebooks/markdown/mdx_tables.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Table extension for Python-Markdown
|
||||
"""
|
||||
|
||||
import markdown
|
||||
|
||||
|
||||
class TablePattern(markdown.Pattern) :
|
||||
def __init__ (self, md):
|
||||
markdown.Pattern.__init__(self, r'^\|([^\n]*)\|(\n|$)')
|
||||
self.md = md
|
||||
|
||||
def handleMatch(self, m, doc) :
|
||||
# a single line represents a row
|
||||
tr = doc.createElement('tr')
|
||||
tr.appendChild(doc.createTextNode('\n'))
|
||||
# chunks between pipes represent cells
|
||||
for t in m.group(2).split('|'):
|
||||
if len(t) >= 2 and t.startswith('*') and t.endswith('*'):
|
||||
# if a cell is bounded by asterisks, it is a <th>
|
||||
td = doc.createElement('th')
|
||||
t = t[1:-1]
|
||||
else:
|
||||
# otherwise it is a <td>
|
||||
td = doc.createElement('td')
|
||||
# apply inline patterns on chunks
|
||||
for n in self.md._handleInline(t):
|
||||
if(type(n) == unicode):
|
||||
td.appendChild(doc.createTextNode(n))
|
||||
else:
|
||||
td.appendChild(n)
|
||||
tr.appendChild(td)
|
||||
# very long lines are evil
|
||||
tr.appendChild(doc.createTextNode('\n'))
|
||||
return tr
|
||||
|
||||
|
||||
class TablePostprocessor:
|
||||
def run(self, doc):
|
||||
# markdown wrapped our <tr>s in a <p>, we fix that here
|
||||
def test_for_p(element):
|
||||
return element.type == 'element' and element.nodeName == 'p'
|
||||
# replace "p > tr" with "table > tr"
|
||||
for element in doc.find(test_for_p):
|
||||
for node in element.childNodes:
|
||||
if(node.type == 'text' and node.value.strip() == ''):
|
||||
# skip leading whitespace
|
||||
continue
|
||||
if (node.type == 'element' and node.nodeName == 'tr'):
|
||||
element.nodeName = 'table'
|
||||
break
|
||||
|
||||
|
||||
class TableExtension(markdown.Extension):
|
||||
def extendMarkdown(self, md, md_globals):
|
||||
md.inlinePatterns.insert(0, TablePattern(md))
|
||||
md.postprocessors.append(TablePostprocessor())
|
||||
|
||||
|
||||
def makeExtension(configs):
|
||||
return TableExtension(configs)
|
||||
|
||||
|
165
src/libprs500/ebooks/markdown/mdx_toc.py
Normal file
165
src/libprs500/ebooks/markdown/mdx_toc.py
Normal file
@ -0,0 +1,165 @@
|
||||
## To access this file as plain text go to
|
||||
## http://freewisdom.org/projects/python-markdown/mdx_toc.raw_content
|
||||
|
||||
"""
|
||||
Chris Clark - clach04 -at- sf.net
|
||||
|
||||
My markdown extensions for adding:
|
||||
Table of Contents (aka toc)
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import markdown
|
||||
|
||||
DEFAULT_TITLE = None
|
||||
|
||||
def extract_alphanumeric(in_str=None):
|
||||
"""take alpha-numeric (7bit ascii) and return as a string
|
||||
"""
|
||||
# I'm sure this is really inefficient and
|
||||
# could be done with a lambda/map()
|
||||
#x.strip().title().replace(' ', "")
|
||||
out_str=[]
|
||||
for x in in_str.title():
|
||||
if x.isalnum(): out_str.append(x)
|
||||
return ''.join(out_str)
|
||||
|
||||
class TitlePostprocessor (markdown.Postprocessor):
|
||||
|
||||
def __init__ (self, extension) :
|
||||
self.extension = extension
|
||||
|
||||
def run(self, doc) :
|
||||
titleElement = self.extension.createTitle(doc)
|
||||
if titleElement :
|
||||
doc.documentElement.insertChild(0, titleElement)
|
||||
|
||||
|
||||
class TocExtension (markdown.Extension):
|
||||
"""Markdown extension: generate a Table Of Contents (aka toc)
|
||||
toc is returned in a div tag with class='toc'
|
||||
toc is either:
|
||||
appended to end of document
|
||||
OR
|
||||
replaces first string occurence of "///Table of Contents Goes Here///"
|
||||
"""
|
||||
|
||||
def __init__ (self) :
|
||||
#maybe add these as parameters to the class init?
|
||||
self.TOC_INCLUDE_MARKER = "///Table of Contents///"
|
||||
self.TOC_TITLE = "Table Of Contents"
|
||||
self.auto_toc_heading_type=2
|
||||
self.toc_heading_type=3
|
||||
|
||||
|
||||
def extendMarkdown(self, md, md_globals) :
|
||||
# Just insert in the end
|
||||
md.postprocessors.append(TocPostprocessor(self))
|
||||
# Stateless extensions do not need to be registered, so we don't
|
||||
# register.
|
||||
|
||||
def findTocPlaceholder(self, doc) :
|
||||
def findTocPlaceholderFn(node=None, indent=0):
|
||||
if node.type == 'text':
|
||||
if node.value.find(self.TOC_INCLUDE_MARKER) > -1 :
|
||||
return True
|
||||
|
||||
toc_div_list = doc.find(findTocPlaceholderFn)
|
||||
if toc_div_list :
|
||||
return toc_div_list[0]
|
||||
|
||||
|
||||
def createTocDiv(self, doc) :
|
||||
"""
|
||||
Creates Table Of Contents based on headers.
|
||||
|
||||
@returns: toc as a single as a dom element
|
||||
in a <div> tag with class='toc'
|
||||
"""
|
||||
|
||||
# Find headers
|
||||
headers_compiled_re = re.compile("h[123456]", re.IGNORECASE)
|
||||
def findHeadersFn(element=None):
|
||||
if element.type=='element':
|
||||
if headers_compiled_re.match(element.nodeName):
|
||||
return True
|
||||
|
||||
headers_doc_list = doc.find(findHeadersFn)
|
||||
|
||||
# Insert anchor tags into dom
|
||||
generated_anchor_id=0
|
||||
headers_list=[]
|
||||
min_header_size_found = 6
|
||||
for element in headers_doc_list:
|
||||
heading_title = element.childNodes[0].value
|
||||
if heading_title.strip() !="":
|
||||
heading_type = int(element.nodeName[-1:])
|
||||
if heading_type == self.auto_toc_heading_type:
|
||||
min_header_size_found=min(min_header_size_found,
|
||||
heading_type)
|
||||
|
||||
html_anchor_name= (extract_alphanumeric(heading_title)
|
||||
+'__MD_autoTOC_%d' % (generated_anchor_id))
|
||||
|
||||
# insert anchor tag inside header tags
|
||||
html_anchor = doc.createElement("a")
|
||||
html_anchor.setAttribute('name', html_anchor_name)
|
||||
element.appendChild(html_anchor)
|
||||
|
||||
headers_list.append( (heading_type, heading_title,
|
||||
html_anchor_name) )
|
||||
generated_anchor_id = generated_anchor_id + 1
|
||||
|
||||
# create dom for toc
|
||||
if headers_list != []:
|
||||
# Create list
|
||||
toc_doc_list = doc.createElement("ul")
|
||||
for (heading_type, heading_title, html_anchor_name) in headers_list:
|
||||
if heading_type == self.auto_toc_heading_type:
|
||||
toc_doc_entry = doc.createElement("li")
|
||||
toc_doc_link = doc.createElement("a")
|
||||
toc_doc_link.setAttribute('href', '#'+html_anchor_name)
|
||||
toc_doc_text = doc.createTextNode(heading_title)
|
||||
toc_doc_link.appendChild(toc_doc_text)
|
||||
toc_doc_entry.appendChild(toc_doc_link)
|
||||
toc_doc_list.appendChild(toc_doc_entry)
|
||||
|
||||
|
||||
# Put list into div
|
||||
div = doc.createElement("div")
|
||||
div.setAttribute('class', 'toc')
|
||||
if self.TOC_TITLE:
|
||||
toc_header = doc.createElement("h%d"%(self.toc_heading_type) )
|
||||
toc_header_text = doc.createTextNode(self.TOC_TITLE)
|
||||
toc_header.appendChild(toc_header_text)
|
||||
div.appendChild(toc_header)
|
||||
div.appendChild(toc_doc_list)
|
||||
#hr = doc.createElement("hr")
|
||||
#div.appendChild(hr)
|
||||
|
||||
return div
|
||||
|
||||
|
||||
class TocPostprocessor (markdown.Postprocessor):
|
||||
|
||||
def __init__ (self, toc) :
|
||||
self.toc = toc
|
||||
|
||||
def run(self, doc):
|
||||
tocPlaceholder = self.toc.findTocPlaceholder(doc)
|
||||
|
||||
tocDiv = self.toc.createTocDiv(doc)
|
||||
if tocDiv:
|
||||
if tocPlaceholder :
|
||||
# Replace "magic" pattern with toc
|
||||
tocPlaceholder.parent.replaceChild(tocPlaceholder, tocDiv)
|
||||
else :
|
||||
# Dump at the end of the DOM
|
||||
# Probably want to use CSS to position div
|
||||
doc.documentElement.appendChild(tocDiv)
|
||||
|
||||
|
||||
def makeExtension(configs=None) :
|
||||
return TocExtension()
|
@ -47,7 +47,7 @@ class _TemporaryFileWrapper(object):
|
||||
os.remove(self.name)
|
||||
|
||||
|
||||
def PersistentTemporaryFile(suffix="", prefix=""):
|
||||
def PersistentTemporaryFile(suffix="", prefix="", dir=None):
|
||||
"""
|
||||
Return a temporary file that is available even after being closed on
|
||||
all platforms. It is automatically deleted when this object is deleted.
|
||||
@ -55,6 +55,7 @@ def PersistentTemporaryFile(suffix="", prefix=""):
|
||||
"""
|
||||
if prefix == None:
|
||||
prefix = ""
|
||||
fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix)
|
||||
fd, name = tempfile.mkstemp(suffix, "libprs500_"+ __version__+"_" + prefix,
|
||||
dir=dir)
|
||||
_file = os.fdopen(fd, "wb")
|
||||
return _TemporaryFileWrapper(_file, name)
|
||||
|
@ -9,6 +9,7 @@ PREFIX = "/var/www/vhosts/kovidgoyal.net/subdomains/libprs500"
|
||||
DOWNLOADS = PREFIX+"/httpdocs/downloads"
|
||||
DOCS = PREFIX+"/httpdocs/apidocs"
|
||||
HTML2LRF = "src/libprs500/ebooks/lrf/html/demo"
|
||||
TXT2LRF = "src/libprs500/ebooks/lrf/txt/demo"
|
||||
check_call = partial(_check_call, shell=True)
|
||||
h = Host(hostType=VIX_SERVICEPROVIDER_VMWARE_WORKSTATION)
|
||||
|
||||
@ -19,7 +20,7 @@ def build_windows():
|
||||
|
||||
|
||||
|
||||
vm = h.openVM('/mnt/extra/vmware/Windows Vista/Windows Vista.vmx')
|
||||
vm = h.openVM('/mnt/backup/vmware/Windows Vista/Windows Vista.vmx')
|
||||
vm.powerOn()
|
||||
if not vm.waitForToolsInGuest():
|
||||
print >>sys.stderr, 'Windows is not booting up'
|
||||
@ -43,7 +44,7 @@ def build_osx():
|
||||
if os.path.exists('dist/dmgdone'):
|
||||
os.unlink('dist/dmgdone')
|
||||
|
||||
vm = h.openVM('/mnt/extra/vmware/Mac OSX/Mac OSX.vmx')
|
||||
vm = h.openVM('/mnt/backup/vmware/Mac OSX/Mac OSX.vmx')
|
||||
vm.powerOn()
|
||||
c = 25 * 60
|
||||
print 'Waiting (minutes):',
|
||||
@ -69,6 +70,8 @@ def upload_demo():
|
||||
f.close()
|
||||
check_call('''html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf %s/demo.html'''%(HTML2LRF,))
|
||||
check_call('''scp /tmp/html2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
|
||||
check_call('''txt2lrf -t 'Demonstration of txt2lrf' -a 'Kovid Goyal' --header -o /tmp/txt2lrf.lrf %s/demo.txt'''%(TXT2LRF,) )
|
||||
check_call('''scp /tmp/txt2lrf.lrf castalia:%s/'''%(DOWNLOADS,))
|
||||
|
||||
def upload_installers(exe, dmg):
|
||||
check_call('''ssh castalia rm -f %s/libprs500\*.exe'''%(DOWNLOADS,))
|
||||
|
Loading…
x
Reference in New Issue
Block a user