Various improvements to html2lrf. Added a demo for html2lrf

This commit is contained in:
Kovid Goyal 2007-04-26 04:56:51 +00:00
parent f25cc305a1
commit 42c4acd360
10 changed files with 307 additions and 86 deletions

View File

@ -5,6 +5,5 @@
<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property> <pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.5</pydev_property>
<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH"> <pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
<path>/libprs500/src</path> <path>/libprs500/src</path>
<path>/libprs500/libprs500.lrf.txt</path>
</pydev_pathproperty> </pydev_pathproperty>
</pydev_project> </pydev_project>

View File

@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
suit your distribution. suit your distribution.
""" """
__version__ = "0.3.13" __version__ = "0.3.14"
__docformat__ = "epytext" __docformat__ = "epytext"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"

View File

@ -29,6 +29,16 @@ __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
class ConversionError(Exception): class ConversionError(Exception):
pass pass
def get_text(elem):
''' Return the textual content of a pylrs element '''
txt = ''
if hasattr(elem, 'text'):
txt += elem.text
if hasattr(elem, 'contents'):
for child in elem.contents:
txt += get_text(child)
return txt
def option_parser(usage): def option_parser(usage):
parser = OptionParser(usage=usage, version='libprs500 '+VERSION) parser = OptionParser(usage=usage, version='libprs500 '+VERSION)
parser.add_option('--header', action='store_true', default=False, dest='header', parser.add_option('--header', action='store_true', default=False, dest='header',

View File

@ -14,14 +14,13 @@
## You should have received a copy of the GNU General Public License along ## You should have received a copy of the GNU General Public License along
## with this program; if not, write to the Free Software Foundation, Inc., ## with this program; if not, write to the Free Software Foundation, Inc.,
## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
""" """
Code to convert HTML ebooks into LRF ebooks. Code to convert HTML ebooks into LRF ebooks.
I am indebted to esperanc for the CSS->Xylog Style conversion routines I am indebted to esperanc for the CSS->Xylog Style conversion routines
and to Falstaff for pylrs. and to Falstaff for pylrs.
""" """
import os, re, sys, shutil import os, re, sys, shutil, traceback
from htmlentitydefs import name2codepoint from htmlentitydefs import name2codepoint
from urllib import urlopen from urllib import urlopen
from urlparse import urlparse from urlparse import urlparse
@ -31,9 +30,10 @@ from operator import itemgetter
from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \ from libprs500.lrf.html.BeautifulSoup import BeautifulSoup, Comment, Tag, \
NavigableString, Declaration NavigableString, Declaration
from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \ from libprs500.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream, TextBlock, \
ImageBlock, JumpButton, CharButton, Page, Bold ImageBlock, JumpButton, CharButton, \
Page, Bold, Space, Plot, TextStyle, Image
from libprs500.lrf.pylrs.pylrs import Span as _Span from libprs500.lrf.pylrs.pylrs import Span as _Span
from libprs500.lrf import ConversionError, option_parser, Book from libprs500.lrf import ConversionError, option_parser, Book, get_text
from libprs500 import extract from libprs500 import extract
def ImagePage(): def ImagePage():
@ -155,6 +155,8 @@ class Span(_Span):
ans = font_weight(val) ans = font_weight(val)
if ans: if ans:
t['fontweight'] = ans t['fontweight'] = ans
if ans > 140:
t['wordspace'] = '50'
elif key.startswith("margin"): elif key.startswith("margin"):
if key == "margin": if key == "margin":
u = [] u = []
@ -181,19 +183,12 @@ class Span(_Span):
t["topskip"] = str(u[0]) t["topskip"] = str(u[0])
if u[1] is not None: if u[1] is not None:
t["sidemargin"] = str(u[1]) t["sidemargin"] = str(u[1])
elif key == "text-align" or key == "align":
if val in ["right", "foot"]:
t["align"] = "foot"
elif val == "center":
t["align"] = "center"
else:
t["align"] = "head"
else: else:
print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key] print >>sys.stderr, 'Unhandled/malformed CSS key:', key, d[key]
return t return t
def __init__(self, ns, css, font_delta=0): def __init__(self, ns, css, font_delta=0):
src = ns.string src = ns.string if hasattr(ns, 'string') else str(ns)
src = re.sub(r'\s{2,}', ' ', src) # Remove multiple spaces src = re.sub(r'\s{2,}', ' ', src) # Remove multiple spaces
for pat, repl in Span.rules: for pat, repl in Span.rules:
src = pat.sub(repl, src) src = pat.sub(repl, src)
@ -228,11 +223,35 @@ class HTMLConverter(object):
i = {"font-style" :"italic"}, i = {"font-style" :"italic"},
em = {"font-style" :"italic"}, em = {"font-style" :"italic"},
small = {'font-size' :'small'}, small = {'font-size' :'small'},
pre = {'font-family' :'monospace' },
center = {'text-align' : 'center'} center = {'text-align' : 'center'}
) )
processed_files = {} #: Files that have been processed processed_files = {} #: Files that have been processed
def __init__(self, book, path, font_delta=0, verbose=False, cover=None): def __init__(self, book, path, width=575, height=747,
font_delta=0, verbose=False, cover=None):
'''
Convert HTML file at C{path} and add it to C{book}. After creating
the object, you must call L{self.process_links} on it to create the links and
then L{self.writeto} to output the LRF/S file.
@param book: The LRF book
@type book: L{libprs500.lrf.pylrs.Book}
@param path: path to the HTML file to process
@type path: C{str}
@param width: Width of the device on which the LRF file is to be read
@type width: C{int}
@param height: Height of the device on which the LRF file is to be read
@type height: C{int}
@param font_delta: The amount in pts by which all fonts should be changed
@type font_delta: C{int}
@param verbose: Whether processing should be verbose or not
@type verbose: C{bool}
@param cover: Path to an image to use as the cover of this book
@type cover: C{str}
'''
self.page_width = width #: The width of the page
self.page_height = height #: The height of the page
self.images = {} #: Images referenced in the HTML document self.images = {} #: Images referenced in the HTML document
self.targets = {} #: <a name=...> elements self.targets = {} #: <a name=...> elements
self.links = [] #: <a href=...> elements self.links = [] #: <a href=...> elements
@ -240,6 +259,7 @@ class HTMLConverter(object):
self.links_processed = False #: Whether links_processed has been called on this object self.links_processed = False #: Whether links_processed has been called on this object
self.font_delta = font_delta self.font_delta = font_delta
self.cover = cover self.cover = cover
self.in_ol = False #: Flag indicating we're in an <ol> element
self.book = book #: The Book object representing a BBeB book self.book = book #: The Book object representing a BBeB book
path = os.path.abspath(path) path = os.path.abspath(path)
os.chdir(os.path.dirname(path)) os.chdir(os.path.dirname(path))
@ -301,7 +321,8 @@ class HTMLConverter(object):
def merge_parent_css(prop, pcss): def merge_parent_css(prop, pcss):
temp = {} temp = {}
for key in pcss.keys(): for key in pcss.keys():
if key.lower().startswith('font'): chk = key.lower()
if chk.startswith('font') or chk == 'text-align':
temp[key] = pcss[key] temp[key] = pcss[key]
prop.update(temp) prop.update(temp)
@ -330,11 +351,11 @@ class HTMLConverter(object):
self.top = self.current_block self.top = self.current_block
self.process_children(self.soup, {}) self.process_children(self.soup, {})
if self.current_para: if self.current_para and get_text(self.current_para).strip():
self.current_block.append(self.current_para) self.current_block.append(self.current_para)
if self.current_block: if self.current_block and get_text(self.current_block).strip():
self.current_page.append(self.current_block) self.current_page.append(self.current_block)
if self.current_page: if self.current_page and get_text(self.current_page).strip():
self.book.append(self.current_page) self.book.append(self.current_page)
@ -356,7 +377,7 @@ class HTMLConverter(object):
cwd = os.getcwd() cwd = os.getcwd()
for link in self.links: for link in self.links:
purl = urlparse(link.tag['href']) purl = urlparse(link.tag['href'])
if purl[1]: # Not a local link if purl[1]: # Not a link to a file on the local filesystem
continue continue
path, fragment = purl[2], purl[5] path, fragment = purl[2], purl[5]
para, tag = link.para, link.tag para, tag = link.para, link.tag
@ -366,6 +387,7 @@ class HTMLConverter(object):
jb = JumpButton(tb) jb = JumpButton(tb)
self.book.append(jb) self.book.append(jb)
cb = CharButton(jb, text=self.get_text(tag)) cb = CharButton(jb, text=self.get_text(tag))
para.contents = []
para.append(cb) para.append(cb)
else: else:
if not os.access(path, os.R_OK): if not os.access(path, os.R_OK):
@ -378,7 +400,9 @@ class HTMLConverter(object):
self.files[path] = HTMLConverter(self.book, path, \ self.files[path] = HTMLConverter(self.book, path, \
font_delta=self.font_delta, verbose=self.verbose) font_delta=self.font_delta, verbose=self.verbose)
HTMLConverter.processed_files[path] = self.files[path] HTMLConverter.processed_files[path] = self.files[path]
except: except Exception, e:
print >>sys.stderr, 'Unable to process', path
traceback.print_exc()
continue continue
finally: finally:
os.chdir(cwd) os.chdir(cwd)
@ -392,6 +416,7 @@ class HTMLConverter(object):
jb = JumpButton(tb) jb = JumpButton(tb)
self.book.append(jb) self.book.append(jb)
cb = CharButton(jb, text=self.get_text(tag)) cb = CharButton(jb, text=self.get_text(tag))
para.contents = []
para.append(cb) para.append(cb)
self.links_processed = True self.links_processed = True
@ -411,13 +436,13 @@ class HTMLConverter(object):
End the current page, ensuring that any further content is displayed End the current page, ensuring that any further content is displayed
on a new page. on a new page.
""" """
if self.current_para.contents: if get_text(self.current_para).strip():
self.current_block.append(self.current_para) self.current_block.append(self.current_para)
self.current_para = Paragraph() self.current_para = Paragraph()
if self.current_block.contents: if get_text(self.current_block).strip():
self.current_page.append(self.current_block) self.current_page.append(self.current_block)
self.current_block = TextBlock() self.current_block = TextBlock()
if self.current_page.contents: if get_text(self.current_page).strip():
self.book.append(self.current_page) self.book.append(self.current_page)
self.current_page = Page() self.current_page = Page()
@ -442,8 +467,33 @@ class HTMLConverter(object):
self.add_text(c, pcss) self.add_text(c, pcss)
def add_text(self, tag, css): def add_text(self, tag, css):
'''
Add text to the current paragraph taking CSS into account.
@param tag: Either a BeautifulSoup tag or a string
@param css:
@type css:
'''
src = tag.string if hasattr(tag, 'string') else str(tag)
if not src.strip():
self.current_para.append(' ')
else:
align = 'head'
if css.has_key('text-align'):
val = css['text-align']
if val in ["right", "foot"]:
align = "foot"
elif val == "center":
align = "center"
css.pop('text-align')
if align != self.current_block.textStyle.attrs['align']:
if get_text(self.current_para).strip():
self.current_block.append(self.current_para)
if get_text(self.current_block).strip():
self.current_page.append(self.current_block)
self.current_block = TextBlock(TextStyle(align=align))
self.current_para = Paragraph()
try: try:
self.current_para.append(Span(tag, self.sanctify_css(css), \ self.current_para.append(Span(src, self.sanctify_css(css), \
font_delta=self.font_delta)) font_delta=self.font_delta))
except ConversionError, err: except ConversionError, err:
if self.verbose: if self.verbose:
@ -461,22 +511,21 @@ class HTMLConverter(object):
css.pop(key) css.pop(key)
return css return css
def end_current_para(self):
'''
End current paragraph with a paragraph break after it. If the current
paragraph has no non whitespace text in it do nothing.
'''
if not get_text(self.current_para).strip():
return
if self.current_para.contents:
self.current_block.append(self.current_para)
self.current_para = Paragraph()
if self.current_block.contents and \
not isinstance(self.current_block.contents[-1], CR):
self.current_block.append(CR())
def parse_tag(self, tag, parent_css): def parse_tag(self, tag, parent_css):
def process_text_tag(tag, tag_css):
if 'page-break-before' in tag_css.keys():
if tag_css['page-break-before'].lower() != 'avoid':
self.end_page()
tag_css.pop('page-break-before')
end_page = False
if 'page-break-after' in tag_css.keys():
end_page = True
tag_css.pop('page-break-after')
self.process_children(tag, tag_css)
if end_page:
self.end_page()
try: try:
tagname = tag.name.lower() tagname = tag.name.lower()
except AttributeError: except AttributeError:
@ -488,17 +537,47 @@ class HTMLConverter(object):
return return
except KeyError: except KeyError:
pass pass
if 'page-break-before' in tag_css.keys():
if tag_css['page-break-before'].lower() != 'avoid':
self.end_page()
tag_css.pop('page-break-before')
end_page = False
if 'page-break-after' in tag_css.keys() and \
tag_css['page-break-after'].lower() != 'avoid':
end_page = True
tag_css.pop('page-break-after')
if tagname in ["title", "script", "meta", 'del']: if tagname in ["title", "script", "meta", 'del']:
pass pass
elif tagname == 'a': elif tagname == 'a':
if tag.has_key('name'): if tag.has_key('name'):
if get_text(self.current_para).strip():
self.current_block.append(self.current_para) self.current_block.append(self.current_para)
if get_text(self.current_block).strip():
self.current_page.append(self.current_block) self.current_page.append(self.current_block)
previous = self.current_block
tb = TextBlock() tb = TextBlock()
self.current_block = tb self.current_block = tb
self.current_para = Paragraph() self.current_para = Paragraph()
self.targets[tag['name']] = tb self.targets[tag['name']] = tb
self.process_children(tag, tag_css) self.process_children(tag, tag_css)
if tb.parent == None:
if self.current_block == tb:
if get_text(self.current_para):
self.current_block.append(self.current_para)
self.current_para = Paragraph()
self.current_page.append(self.current_block)
self.current_block = TextBlock()
else:
found, marked = False, False
for item in self.current_page.contents:
if item == previous:
found = True
if found and isinstance(item, TextBlock):
self.targets[tag['name']] = item
marked = True
if not marked:
self.current_page.append(tb)
elif tag.has_key('href'): elif tag.has_key('href'):
purl = urlparse(tag['href']) purl = urlparse(tag['href'])
path = purl[2] path = purl[2]
@ -506,19 +585,18 @@ class HTMLConverter(object):
['png', 'jpg', 'bmp', 'jpeg']: ['png', 'jpg', 'bmp', 'jpeg']:
self.add_image_page(path) self.add_image_page(path)
else: else:
span = _Span() self.add_text('Link: '+tag['href'], tag_css)
self.current_para.append(span) self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
self.links.append(HTMLConverter.Link(span, tag))
elif tagname == 'img': elif tagname == 'img':
if tag.has_key('src') and os.access(tag['src'], os.R_OK): if tag.has_key('src') and os.access(tag['src'], os.R_OK):
width, height = 600, 800 width, height = self.page_width, self.page_height
try: try:
try: try:
from PIL import Image from PIL import Image as PILImage
except: except:
pass pass
else: else:
im = Image.open(tag['src']) im = PILImage.open(tag['src'])
width, height = im.size width, height = im.size
if tag.has_key('width'): if tag.has_key('width'):
width = int(tag['width']) width = int(tag['width'])
@ -526,13 +604,23 @@ class HTMLConverter(object):
height = int(tag['height']) height = int(tag['height'])
except: except:
pass pass
path = os.path.abspath(tag['src'])
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
if max(width, height) <= min(self.page_width, self.page_height)/5.:
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
elif max(width, height) <= min(self.page_width, self.page_height)/2.:
self.end_current_para()
im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
xsize=width, ysize=height)
self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
else:
self.current_block.append(self.current_para) self.current_block.append(self.current_para)
self.current_page.append(self.current_block) self.current_page.append(self.current_block)
self.current_para = Paragraph() self.current_para = Paragraph()
self.current_block = TextBlock() self.current_block = TextBlock()
path = os.path.abspath(tag['src'])
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
im = ImageBlock(self.images[path], x1=width, y1=height, im = ImageBlock(self.images[path], x1=width, y1=height,
xsize=width, ysize=height) xsize=width, ysize=height)
self.current_page.append(im) self.current_page.append(im)
@ -557,31 +645,73 @@ class HTMLConverter(object):
f.close() f.close()
except IOError: except IOError:
pass pass
elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']: elif tagname == 'pre':
# TODO: Implement ol self.end_current_para()
indent = tag_css.pop('text-indent', '') src = ''.join([str(i) for i in tag.contents])
if indent: lines = src.split('\n')
# TODO: If indent is different from current textblock's parindent for line in lines:
# start a new TextBlock try:
self.current_para.append(Span(line, tag_css))
except ConversionError:
pass pass
self.current_para.CR() # Put a paragraph end self.current_para.CR()
elif tagname in ['ul', 'ol']:
self.in_ol = 1 if tagname == 'ol' else 0
self.end_current_para()
self.process_children(tag, tag_css)
self.in_ol = 0
self.end_current_para()
elif tagname == 'li':
prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
if get_text(self.current_para).strip():
self.current_para.append(CR())
self.current_block.append(self.current_para) self.current_block.append(self.current_para)
self.current_para = Paragraph() self.current_para = Paragraph()
process_text_tag(tag, tag_css) self.current_para.append(Space(xsize=100))
self.current_para.append(prepend)
self.process_children(tag, tag_css)
if self.in_ol:
self.in_ol += 1
elif tagname in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
self.end_current_para()
if self.current_block.contents:
self.current_block.append(CR())
self.process_children(tag, tag_css)
self.end_current_para()
self.current_block.append(CR())
elif tagname in ['p', 'div']:
# TODO: Implement ol
#indent = tag_css.pop('text-indent', '')
#if indent:
# TODO: If indent is different from current textblock's parindent
# start a new TextBlock
#pass
self.end_current_para()
self.process_children(tag, tag_css)
self.end_current_para()
elif tagname in ['b', 'strong', 'i', 'em', 'span']: elif tagname in ['b', 'strong', 'i', 'em', 'span']:
process_text_tag(tag, tag_css) self.process_children(tag, tag_css)
elif tagname == 'font': elif tagname == 'font':
if tag.has_key('face'): if tag.has_key('face'):
tag_css['font-family'] = tag['face'] tag_css['font-family'] = tag['face']
process_text_tag(tag, tag_css) self.process_children(tag, tag_css)
elif tagname == 'br': elif tagname in ['br', 'tr']:
self.current_para.append(CR()) self.current_para.append(CR())
self.process_children(tag, tag_css)
elif tagname == 'hr': elif tagname == 'hr':
self.current_para.append(CR()) if self.current_para.contents:
# TODO: Horizontal line? self.current_block.append(self.current_para)
self.current_para = Paragraph()
self.current_block.append(CR())
self.current_page.append(self.current_block)
self.current_block = TextBlock()
self.current_page.RuledLine(linelength=self.page_width)
else: else:
self.process_children(tag, tag_css) self.process_children(tag, tag_css)
if end_page:
self.end_page()
def writeto(self, path, lrs=False): def writeto(self, path, lrs=False):
self.book.renderLrs(path) if lrs else self.book.renderLrf(path) self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
@ -632,6 +762,7 @@ def process_file(path, options):
suffix = '.lrs' if options.lrs else '.lrf' suffix = '.lrs' if options.lrs else '.lrf'
name = os.path.splitext(os.path.basename(path))[0] + suffix name = os.path.splitext(os.path.basename(path))[0] + suffix
oname = os.path.join(cwd,name) oname = os.path.join(cwd,name)
oname = os.path.abspath(os.path.expanduser(oname))
conv.writeto(oname, lrs=options.lrs) conv.writeto(oname, lrs=options.lrs)
print 'Output written to', oname print 'Output written to', oname
finally: finally:
@ -692,7 +823,7 @@ def console_query(dirpath, candidate, docs):
def get_path(path, query=console_query): def get_path(path, query=console_query):
path = os.path.abspath(path) path = os.path.abspath(os.path.expanduser(path))
ext = os.path.splitext(path)[1][1:].lower() ext = os.path.splitext(path)[1][1:].lower()
if ext in ['htm', 'html', 'xhtml']: if ext in ['htm', 'html', 'xhtml']:
return None, path return None, path

View File

@ -0,0 +1,73 @@
<html>
<h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>
<p>
This file contains a demonstration of the capabilities of <span style='font-family:monospace'>html2lrf,</span> the HTML to LRF converter from <em>libprs500.</em> To obtain libprs500 visit <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
</p>
<h2><a name='toc'>Table of Contents</a></h2>
<ul style='page-break-after:always'>
<li><a href='#lists'>Demonstration of Lists</a></li>
<li><a href='#text'>Text formatting and ruled lines</a></li>
<li><a href='#images'>Inline images</a></li>
<li><a href='#recursive'>Recursive link following</a></li>
<li><a href='demo_ext.html'>The HTML used to create this file</a>
</ul>
<h2><a name='lists'>Lists</a></h2>
<p><h3>Unordered lists</h3>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
</p>
<p><h3>Ordered lists</h3>
<ol>
<li>Item 1</li>
<li>Item 2</li>
</ol>
</p>
<br/>
<p>
Note that nested lists are not supported.
</p>
<p style='page-break-after:always'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2><a name='text'>Text formatting</a></h2>
<p>
A simple <i>paragraph</i> of <b>formatted
<i>text</i></b> with a ruled line following it.
</p>
<hr/>
<p> A
<span style='font-style:italic'>similar</span>
paragraph, but now using
<span style='font-weight:bold'>CSS</span>
to perform the text formatting.</p>
<hr/>
<center>A centered phrase</center>
<span style='text-align:right'>A right aligned phrase</span>
A normal phrase
<p style='page-break-after:always'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2 style='page-break-before:always'><a name='images'>Inline images</a></h2>
<p>
Here I demonstrate the use of inline images in the midst of text. Here is a small image <img src='small.jpg' /> embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block <img src='medium.jpg' /> and finally we have a large image which is automatically placed on a page by itself and prevented from being autoscaled when the user changes from S to M to L. Try changing sizes and see how the different embedding styles behave. <img src='large.jpg' />
</p>
<p style='page-break-after:always'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
<h2 style='page-break-before:always'><a name='recursive'>Recursive link following</a></h2>
<span style='font:monospace'>html2lrf</span> follows links in HTML files that point to other files, recursively. Thus it can be used to convert a whole tree of HTML files into a single LRF file.
<p style='page-break-after:always'>
<hr />
<a href='#toc'>Table of Contents</a>
</p>
</html>

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.0 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@ -40,7 +40,7 @@ def main():
if len(args) != 1: if len(args) != 1:
parser.print_help() parser.print_help()
sys.exit(1) sys.exit(1)
src = args[0] src = os.path.abspath(os.path.expanduser(args[0]))
if options.title == None: if options.title == None:
options.title = os.path.splitext(os.path.basename(src))[0] options.title = os.path.splitext(os.path.basename(src))[0]
try: try:
@ -78,6 +78,7 @@ def convert_txt(path, options):
buffer = '' buffer = ''
basename = os.path.basename(path) basename = os.path.basename(path)
oname = options.output oname = options.output
oname = os.path.abspath(os.path.expanduser(oname))
if not oname: if not oname:
oname = os.path.splitext(basename)[0]+'.lrf' oname = os.path.splitext(basename)[0]+'.lrf'
try: try:

7
upload
View File

@ -7,6 +7,13 @@ DOWNLOADS=$PREFIX/httpdocs/downloads
DOCS=$PREFIX/httpdocs/apidocs DOCS=$PREFIX/httpdocs/apidocs
exe=`cd dist && ls -1 libprs500-*.exe | tail -n1 && cd ..` exe=`cd dist && ls -1 libprs500-*.exe | tail -n1 && cd ..`
echo "<h2>The HTML</h2><pre>" > src/libprs500/lrf/html/demo/demo_ext.html
cat src/libprs500/lrf/html/demo/demo.html >> src/libprs500/lrf/html/demo/demo_ext.html
echo '</pre>' >> src/libprs500/lrf/html/demo/demo_ext.html
html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf src/libprs500/lrf/html/demo/demo.html
scp /tmp/html2lrf.lrf castalia:$DOWNLOADS/
ssh castalia rm -f $DOWNLOADS/libprs500\*.exe ssh castalia rm -f $DOWNLOADS/libprs500\*.exe
scp dist/$exe castalia:$DOWNLOADS/ scp dist/$exe castalia:$DOWNLOADS/
ssh castalia chmod a+r $DOWNLOADS/\* ssh castalia chmod a+r $DOWNLOADS/\*