diff --git a/.pydevproject b/.pydevproject
index a8ae13b3c5..ce1227533e 100644
--- a/.pydevproject
+++ b/.pydevproject
@@ -5,6 +5,5 @@
+ This file contains a demonstration of the capabilities of html2lrf, the HTML to LRF converter from libprs500. To obtain libprs500 visit https://libprs500.kovidgoyal.net
+ element
self.book = book #: The Book object representing a BBeB book
path = os.path.abspath(path)
os.chdir(os.path.dirname(path))
@@ -301,17 +321,18 @@ class HTMLConverter(object):
def merge_parent_css(prop, pcss):
temp = {}
for key in pcss.keys():
- if key.lower().startswith('font'):
+ chk = key.lower()
+ if chk.startswith('font') or chk == 'text-align':
temp[key] = pcss[key]
prop.update(temp)
- prop = dict()
+ prop = dict()
if tag.has_key("align"):
prop["text-align"] = tag["align"]
if self.css.has_key(tag.name):
prop.update(self.css[tag.name])
if tag.has_key("class"):
- cls = tag["class"].lower()
+ cls = tag["class"].lower()
for classname in ["."+cls, tag.name+"."+cls]:
if self.css.has_key(classname):
prop.update(self.css[classname])
@@ -330,11 +351,11 @@ class HTMLConverter(object):
self.top = self.current_block
self.process_children(self.soup, {})
- if self.current_para:
+ if self.current_para and get_text(self.current_para).strip():
self.current_block.append(self.current_para)
- if self.current_block:
+ if self.current_block and get_text(self.current_block).strip():
self.current_page.append(self.current_block)
- if self.current_page:
+ if self.current_page and get_text(self.current_page).strip():
self.book.append(self.current_page)
@@ -356,16 +377,17 @@ class HTMLConverter(object):
cwd = os.getcwd()
for link in self.links:
purl = urlparse(link.tag['href'])
- if purl[1]: # Not a local link
+ if purl[1]: # Not a link to a file on the local filesystem
continue
path, fragment = purl[2], purl[5]
para, tag = link.para, link.tag
if not path or os.path.basename(path) == self.file_name:
if fragment in self.targets.keys():
- tb = self.targets[fragment]
+ tb = self.targets[fragment]
jb = JumpButton(tb)
self.book.append(jb)
cb = CharButton(jb, text=self.get_text(tag))
+ para.contents = []
para.append(cb)
else:
if not os.access(path, os.R_OK):
@@ -373,12 +395,14 @@ class HTMLConverter(object):
print "Skipping", link
continue
path = os.path.abspath(path)
- if not path in HTMLConverter.processed_files.keys():
+ if not path in HTMLConverter.processed_files.keys():
try:
self.files[path] = HTMLConverter(self.book, path, \
font_delta=self.font_delta, verbose=self.verbose)
HTMLConverter.processed_files[path] = self.files[path]
- except:
+ except Exception, e:
+ print >>sys.stderr, 'Unable to process', path
+ traceback.print_exc()
continue
finally:
os.chdir(cwd)
@@ -389,10 +413,11 @@ class HTMLConverter(object):
tb = conv.targets[fragment]
else:
tb = conv.top
- jb = JumpButton(tb)
+ jb = JumpButton(tb)
self.book.append(jb)
cb = CharButton(jb, text=self.get_text(tag))
- para.append(cb)
+ para.contents = []
+ para.append(cb)
self.links_processed = True
@@ -411,13 +436,13 @@ class HTMLConverter(object):
End the current page, ensuring that any further content is displayed
on a new page.
"""
- if self.current_para.contents:
+ if get_text(self.current_para).strip():
self.current_block.append(self.current_para)
self.current_para = Paragraph()
- if self.current_block.contents:
+ if get_text(self.current_block).strip():
self.current_page.append(self.current_block)
self.current_block = TextBlock()
- if self.current_page.contents:
+ if get_text(self.current_page).strip():
self.book.append(self.current_page)
self.current_page = Page()
@@ -442,12 +467,37 @@ class HTMLConverter(object):
self.add_text(c, pcss)
def add_text(self, tag, css):
- try:
- self.current_para.append(Span(tag, self.sanctify_css(css), \
- font_delta=self.font_delta))
- except ConversionError, err:
- if self.verbose:
- print >>sys.stderr, err
+ '''
+ Add text to the current paragraph taking CSS into account.
+ @param tag: Either a BeautifulSoup tag or a string
+ @param css:
+ @type css:
+ '''
+ src = tag.string if hasattr(tag, 'string') else str(tag)
+ if not src.strip():
+ self.current_para.append(' ')
+ else:
+ align = 'head'
+ if css.has_key('text-align'):
+ val = css['text-align']
+ if val in ["right", "foot"]:
+ align = "foot"
+ elif val == "center":
+ align = "center"
+ css.pop('text-align')
+ if align != self.current_block.textStyle.attrs['align']:
+ if get_text(self.current_para).strip():
+ self.current_block.append(self.current_para)
+ if get_text(self.current_block).strip():
+ self.current_page.append(self.current_block)
+ self.current_block = TextBlock(TextStyle(align=align))
+ self.current_para = Paragraph()
+ try:
+ self.current_para.append(Span(src, self.sanctify_css(css), \
+ font_delta=self.font_delta))
+ except ConversionError, err:
+ if self.verbose:
+ print >>sys.stderr, err
def sanctify_css(self, css):
""" Make css safe for use in a SPAM Xylog tag """
@@ -461,22 +511,21 @@ class HTMLConverter(object):
css.pop(key)
return css
+ def end_current_para(self):
+ '''
+ End current paragraph with a paragraph break after it. If the current
+ paragraph has no non whitespace text in it do nothing.
+ '''
+ if not get_text(self.current_para).strip():
+ return
+ if self.current_para.contents:
+ self.current_block.append(self.current_para)
+ self.current_para = Paragraph()
+ if self.current_block.contents and \
+ not isinstance(self.current_block.contents[-1], CR):
+ self.current_block.append(CR())
def parse_tag(self, tag, parent_css):
-
- def process_text_tag(tag, tag_css):
- if 'page-break-before' in tag_css.keys():
- if tag_css['page-break-before'].lower() != 'avoid':
- self.end_page()
- tag_css.pop('page-break-before')
- end_page = False
- if 'page-break-after' in tag_css.keys():
- end_page = True
- tag_css.pop('page-break-after')
- self.process_children(tag, tag_css)
- if end_page:
- self.end_page()
-
try:
tagname = tag.name.lower()
except AttributeError:
@@ -488,17 +537,47 @@ class HTMLConverter(object):
return
except KeyError:
pass
+ if 'page-break-before' in tag_css.keys():
+ if tag_css['page-break-before'].lower() != 'avoid':
+ self.end_page()
+ tag_css.pop('page-break-before')
+ end_page = False
+ if 'page-break-after' in tag_css.keys() and \
+ tag_css['page-break-after'].lower() != 'avoid':
+ end_page = True
+ tag_css.pop('page-break-after')
+
if tagname in ["title", "script", "meta", 'del']:
pass
elif tagname == 'a':
if tag.has_key('name'):
- self.current_block.append(self.current_para)
- self.current_page.append(self.current_block)
+ if get_text(self.current_para).strip():
+ self.current_block.append(self.current_para)
+ if get_text(self.current_block).strip():
+ self.current_page.append(self.current_block)
+ previous = self.current_block
tb = TextBlock()
self.current_block = tb
self.current_para = Paragraph()
self.targets[tag['name']] = tb
self.process_children(tag, tag_css)
+ if tb.parent == None:
+ if self.current_block == tb:
+ if get_text(self.current_para):
+ self.current_block.append(self.current_para)
+ self.current_para = Paragraph()
+ self.current_page.append(self.current_block)
+ self.current_block = TextBlock()
+ else:
+ found, marked = False, False
+ for item in self.current_page.contents:
+ if item == previous:
+ found = True
+ if found and isinstance(item, TextBlock):
+ self.targets[tag['name']] = item
+ marked = True
+ if not marked:
+ self.current_page.append(tb)
elif tag.has_key('href'):
purl = urlparse(tag['href'])
path = purl[2]
@@ -506,19 +585,18 @@ class HTMLConverter(object):
['png', 'jpg', 'bmp', 'jpeg']:
self.add_image_page(path)
else:
- span = _Span()
- self.current_para.append(span)
- self.links.append(HTMLConverter.Link(span, tag))
+ self.add_text('Link: '+tag['href'], tag_css)
+ self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
elif tagname == 'img':
if tag.has_key('src') and os.access(tag['src'], os.R_OK):
- width, height = 600, 800
+ width, height = self.page_width, self.page_height
try:
try:
- from PIL import Image
+ from PIL import Image as PILImage
except:
pass
else:
- im = Image.open(tag['src'])
+ im = PILImage.open(tag['src'])
width, height = im.size
if tag.has_key('width'):
width = int(tag['width'])
@@ -526,16 +604,26 @@ class HTMLConverter(object):
height = int(tag['height'])
except:
pass
- self.current_block.append(self.current_para)
- self.current_page.append(self.current_block)
- self.current_para = Paragraph()
- self.current_block = TextBlock()
path = os.path.abspath(tag['src'])
if not self.images.has_key(path):
self.images[path] = ImageStream(path)
- im = ImageBlock(self.images[path], x1=width, y1=height,
- xsize=width, ysize=height)
- self.current_page.append(im)
+ if max(width, height) <= min(self.page_width, self.page_height)/5.:
+ im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
+ xsize=width, ysize=height)
+ self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
+ elif max(width, height) <= min(self.page_width, self.page_height)/2.:
+ self.end_current_para()
+ im = Image(self.images[path], x0=0, y0=0, x1=width, y1=height,\
+ xsize=width, ysize=height)
+ self.current_para.append(Plot(im, xsize=width*10, ysize=width*10))
+ else:
+ self.current_block.append(self.current_para)
+ self.current_page.append(self.current_block)
+ self.current_para = Paragraph()
+ self.current_block = TextBlock()
+ im = ImageBlock(self.images[path], x1=width, y1=height,
+ xsize=width, ysize=height)
+ self.current_page.append(im)
else:
print >>sys.stderr, "Failed to process", tag
@@ -557,30 +645,72 @@ class HTMLConverter(object):
f.close()
except IOError:
pass
- elif tagname in ['p', 'div', 'ul', 'ol', 'tr', 'li', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+ elif tagname == 'pre':
+ self.end_current_para()
+ src = ''.join([str(i) for i in tag.contents])
+ lines = src.split('\n')
+ for line in lines:
+ try:
+ self.current_para.append(Span(line, tag_css))
+ except ConversionError:
+ pass
+ self.current_para.CR()
+ elif tagname in ['ul', 'ol']:
+ self.in_ol = 1 if tagname == 'ol' else 0
+ self.end_current_para()
+ self.process_children(tag, tag_css)
+ self.in_ol = 0
+ self.end_current_para()
+ elif tagname == 'li':
+ prepend = str(self.in_ol)+'. ' if self.in_ol else u'\u2022' + ' '
+ if get_text(self.current_para).strip():
+ self.current_para.append(CR())
+ self.current_block.append(self.current_para)
+ self.current_para = Paragraph()
+ self.current_para.append(Space(xsize=100))
+ self.current_para.append(prepend)
+ self.process_children(tag, tag_css)
+ if self.in_ol:
+ self.in_ol += 1
+ elif tagname in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+ self.end_current_para()
+ if self.current_block.contents:
+ self.current_block.append(CR())
+ self.process_children(tag, tag_css)
+ self.end_current_para()
+ self.current_block.append(CR())
+ elif tagname in ['p', 'div']:
# TODO: Implement ol
- indent = tag_css.pop('text-indent', '')
- if indent:
+ #indent = tag_css.pop('text-indent', '')
+ #if indent:
# TODO: If indent is different from current textblock's parindent
# start a new TextBlock
- pass
- self.current_para.CR() # Put a paragraph end
- self.current_block.append(self.current_para)
- self.current_para = Paragraph()
- process_text_tag(tag, tag_css)
+ #pass
+ self.end_current_para()
+ self.process_children(tag, tag_css)
+ self.end_current_para()
elif tagname in ['b', 'strong', 'i', 'em', 'span']:
- process_text_tag(tag, tag_css)
+ self.process_children(tag, tag_css)
elif tagname == 'font':
if tag.has_key('face'):
tag_css['font-family'] = tag['face']
- process_text_tag(tag, tag_css)
- elif tagname == 'br':
+ self.process_children(tag, tag_css)
+ elif tagname in ['br', 'tr']:
self.current_para.append(CR())
+ self.process_children(tag, tag_css)
elif tagname == 'hr':
- self.current_para.append(CR())
- # TODO: Horizontal line?
+ if self.current_para.contents:
+ self.current_block.append(self.current_para)
+ self.current_para = Paragraph()
+ self.current_block.append(CR())
+ self.current_page.append(self.current_block)
+ self.current_block = TextBlock()
+ self.current_page.RuledLine(linelength=self.page_width)
else:
- self.process_children(tag, tag_css)
+ self.process_children(tag, tag_css)
+
+ if end_page:
+ self.end_page()
def writeto(self, path, lrs=False):
self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
@@ -632,6 +762,7 @@ def process_file(path, options):
suffix = '.lrs' if options.lrs else '.lrf'
name = os.path.splitext(os.path.basename(path))[0] + suffix
oname = os.path.join(cwd,name)
+ oname = os.path.abspath(os.path.expanduser(oname))
conv.writeto(oname, lrs=options.lrs)
print 'Output written to', oname
finally:
@@ -692,7 +823,7 @@ def console_query(dirpath, candidate, docs):
def get_path(path, query=console_query):
- path = os.path.abspath(path)
+ path = os.path.abspath(os.path.expanduser(path))
ext = os.path.splitext(path)[1][1:].lower()
if ext in ['htm', 'html', 'xhtml']:
return None, path
diff --git a/src/libprs500/lrf/html/demo/demo.html b/src/libprs500/lrf/html/demo/demo.html
new file mode 100644
index 0000000000..8cb52d943b
--- /dev/null
+++ b/src/libprs500/lrf/html/demo/demo.html
@@ -0,0 +1,73 @@
+
+
Demo of html2lrf
+ Table of Contents
+
+
+
+ Lists
+ Unordered lists
+
+
+
+ Note that nested lists are not supported. +
++
+ A simple paragraph of formatted + text with a ruled line following it. +
+A + similar + paragraph, but now using + CSS + to perform the text formatting.
++
+ Here I demonstrate the use of inline images in the midst of text. Here is a small image embedded in a sentence. Now we have a slightly larger image that is automatically put in its own block
and finally we have a large image which is automatically placed on a page by itself and prevented from being autoscaled when the user changes from S to M to L. Try changing sizes and see how the different embedding styles behave.
+
+
+
" > src/libprs500/lrf/html/demo/demo_ext.html +cat src/libprs500/lrf/html/demo/demo.html >> src/libprs500/lrf/html/demo/demo_ext.html +echo '' >> src/libprs500/lrf/html/demo/demo_ext.html +html2lrf --title='Demonstration of html2lrf' --author='Kovid Goyal' --header --output=/tmp/html2lrf.lrf src/libprs500/lrf/html/demo/demo.html +scp /tmp/html2lrf.lrf castalia:$DOWNLOADS/ + ssh castalia rm -f $DOWNLOADS/libprs500\*.exe scp dist/$exe castalia:$DOWNLOADS/ ssh castalia chmod a+r $DOWNLOADS/\*