Added support for conversion for HTML tables.

Added support for common encodings to txt2lrf.
2025-07-09 03:04:10 -04:00 · 2007-05-21 00:40:06 +00:00 · 2007-05-21 00:40:06 +00:00 · 806aba6f80
commit 806aba6f80
parent b26adb541e
9 changed files with 492 additions and 39 deletions
--- a/src/libprs500/init.py
+++ b/src/libprs500/init.py
@ -33,7 +33,7 @@ You may have to adjust the GROUP and the location of the rules file to
 suit your distribution.
 """

-__version__   = "0.3.32"
+__version__   = "0.3.33"
 __docformat__ = "epytext"
 __author__    = "Kovid Goyal <kovid@kovidgoyal.net>"

--- a/src/libprs500/devices/prs500/prstypes.py
+++ b/src/libprs500/devices/prs500/prstypes.py
@ -352,7 +352,7 @@ class SetTime(Command):
        self.day = t[2]
        self.hour = t[3]
        self.minute = t[4]
-        # Hack you should actually update the entire time tree is 
+        # Hack you should actually update the entire time tree if 
        # second is > 59
        self.second = t[5] if t[5] < 60 else 59 

--- a/src/libprs500/ebooks/lrf/fonts/init.py
+++ b/src/libprs500/ebooks/lrf/fonts/init.py
@ -13,7 +13,11 @@
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 import pkg_resources
-from PIL import ImageFont
+try:
+    from PIL import ImageFont
+except ImportError:
+    import ImageFont
+    
 '''
 Default fonts used in the PRS500
 '''
@ -26,7 +30,8 @@ FONT_MAP = {
 def get_font(name, size, encoding='unic'):
    '''
    Get an ImageFont object by name. 
-    @param size: Size in pts
+    @param size: Font height in pixels. To convert from pts:
+                 sz in pixels = (dpi/72) * size in pts
    @param encoding: Font encoding to use. E.g. 'unic', 'symbol', 'ADOB', 'ADBE', 'aprm'
    '''
    if name in FONT_MAP.keys():
--- a/src/libprs500/ebooks/lrf/html/convert_from.py
+++ b/src/libprs500/ebooks/lrf/html/convert_from.py
@ -39,6 +39,7 @@ from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, CR, Italic, ImageStream,
                Plot, Image, BlockSpace, RuledLine, BookSetting, Canvas
 from libprs500.ebooks.lrf.pylrs.pylrs import Span as _Span
 from libprs500.ebooks.lrf import ConversionError, option_parser, Book, PRS500_PROFILE
+from libprs500.ebooks.lrf.html.table import Table 
 from libprs500 import extract, filename_to_utf8
 from libprs500.ptempfile import PersistentTemporaryFile

@ -303,6 +304,7 @@ class HTMLConverter(object):
        self.chapter_regex = chapter_regex #: Regex used to search for chapter titles
        self.link_exclude = link_exclude #: Ignore matching hrefs
        self.scaled_images = {}   #: Temporary files with scaled version of images        
+        self.rotated_images = {}  #: Temporary files with rotated version of images        
        self.max_link_levels = max_link_levels #: Number of link levels to process recursively
        self.link_level  = link_level  #: Current link level
        self.blockquote_style = book.create_block_style(sidemargin=60, 
@ -317,6 +319,9 @@ class HTMLConverter(object):
        self.files   = {}         #: links that point to other files
        self.links_processed = False #: Whether links_processed has been called on this object
        self.font_delta = font_delta
+        # Set by table processing code so that any <a name> within the table 
+        # point to the previous element
+        self.anchor_to_previous = None 
        self.cover = cover
        self.memory = []          #: Used to ensure that duplicate CSS unhandled erros are not reported
        self.in_ol = False #: Flag indicating we're in an <ol> element
@ -478,6 +483,15 @@ class HTMLConverter(object):
            return text
    
    def process_links(self):
+        def add_toc_entry(text, target):
+            # TextBlocks in Canvases have a None parent or an Objects Parent
+            if target.parent != None and \
+               hasattr(target.parent, 'objId'): 
+                self.book.addTocEntry(ascii_text, tb)
+            elif self.verbose:
+                print "Cannot add link", ascii_text, "to TOC"
+                
+        
        def get_target_block(fragment, targets):
            '''Return the correct block for the <a name> element'''
            bs = targets[fragment]
@ -535,7 +549,7 @@ class HTMLConverter(object):
                if fragment in self.targets.keys():
                    tb = get_target_block(fragment, self.targets)
                    if self.is_root:
-                        self.book.addTocEntry(ascii_text, tb)                 
+                        add_toc_entry(ascii_text, tb)                        
                    sys.stdout.flush()
                    jb = JumpButton(tb)
                    self.book.append(jb)
@ -580,7 +594,7 @@ class HTMLConverter(object):
                else:
                    tb = conv.top
                if self.is_root:
-                    self.book.addTocEntry(ascii_text, tb)      
+                    add_toc_entry(ascii_text, tb)  
                jb = JumpButton(tb)                
                self.book.append(jb)
                cb = CharButton(jb, text=text)
@ -727,22 +741,32 @@ class HTMLConverter(object):
                                                         blockStyle=self.current_block.blockStyle)
    
    def process_image(self, path, tag_css, width=None, height=None):
+        if self.rotated_images.has_key(path):
+            path = self.rotated_images[path].name
+        if self.scaled_images.has_key(path):
+            path = self.scaled_images[path].name            
+        
+        im = PILImage.open(path)
+        
+        if width == None or height == None:            
+            width, height = im.size
+        
        def scale_image(width, height):
            pt = PersistentTemporaryFile(suffix='.jpeg')
            im.resize((int(width), int(height)), PILImage.ANTIALIAS).convert('RGB').save(pt, 'JPEG')
            pt.close()
            self.scaled_images[path] = pt
            return pt.name
-                
-        if self.scaled_images.has_key(path):
-            path = self.scaled_images[path].name
-            
-        im = PILImage.open(path)
-        if width == None or height == None:            
-            width, height = im.size
-        if width > height:
+        
+        if width > self.profile.page_width and width > height:
+            pt = PersistentTemporaryFile(suffix='.jpeg')
            im = im.rotate(-90)
+            im.convert('RGB').save(pt, 'JPEG')
+            path = pt.name
+            pt.close()            
+            self.rotated_images[path] = pt
            width, height = im.size
+            
        if height > self.profile.page_height:
            corrf = self.profile.page_height/(1.*height)
            width, height = floor(corrf*width), self.profile.page_height-1                        
@ -788,7 +812,7 @@ class HTMLConverter(object):
            self.end_page()
            self.current_page.append(Canvas(width=self.profile.page_width,
                                            height=height))
-            left = int(floor((self.profile.page_width - width)/2.))            
+            left = int(floor((self.profile.page_width - width)/2.))
            self.current_page.contents[0].put_object(ImageBlock(self.images[path]),
                                                  left, 0)
    
@ -824,6 +848,18 @@ class HTMLConverter(object):
            pass
        elif tagname == 'a' and self.max_link_levels >= 0:
            if tag.has_key('name'):
+                if self.anchor_to_previous:
+                    self.process_children(tag, tag_css)
+                    return
+                    for c in self.anchor_to_previous.contents:
+                        if isinstance(c, (TextBlock, ImageBlock)):
+                            self.targets[tag['name']] = c
+                            return
+                    tb = self.book.create_text_block()
+                    tb.Paragraph(" ")
+                    self.anchor_to_previous.append(tb)
+                    self.targets[tag['name']] = tb                    
+                    return
                previous = self.current_block
                self.process_children(tag, tag_css)
                target = None
@ -867,7 +903,7 @@ class HTMLConverter(object):
                    ['png', 'jpg', 'bmp', 'jpeg']:
                    self.process_image(path, tag_css)
                else:
-                    self.add_text('Link: ' + tag['href'], tag_css)
+                    self.add_text(self.get_text(tag), tag_css)
                    self.links.append(HTMLConverter.Link(self.current_para.contents[-1], tag))
        elif tagname == 'img':
            if tag.has_key('src') and os.access(unquote(tag['src']), os.R_OK):
@ -1010,30 +1046,45 @@ class HTMLConverter(object):
            if tag.has_key('face'):
                tag_css['font-family'] = tag['face']
            self.process_children(tag, tag_css)
-        elif tagname in ['br', 'tr']:
+        elif tagname in ['br']:
            self.current_para.append(CR())
-            self.process_children(tag, tag_css)
-        elif tagname in ['td']:
-            self.current_para.append(' ')
-            self.process_children(tag, tag_css)
        elif tagname == 'hr':
            self.end_current_para()            
            self.current_block.append(CR())
            self.end_current_block()
            self.current_page.RuledLine(linelength=self.profile.page_width)
+        elif tagname == 'table':
+            tag_css = self.tag_css(tag) # Table should not inherit CSS
+            self.process_table(tag, tag_css)
        else:            
-            self.process_children(tag, tag_css)
-        
+            self.process_children(tag, tag_css)        
        if end_page:
                self.end_page()
                    
+    def process_table(self, tag, tag_css):
+        self.end_current_block()
+        colpad = 10
+        table = Table(self, tag, tag_css, rowpad=10, colpad=10)   
+        canvases = []
+        for block, xpos, ypos, delta in table.blocks(self.profile.page_width):
+            if not block:
+                canvases.append(Canvas(self.profile.page_width, ypos+colpad,
+                        blockrule='block-fixed'))
+            else:
+                canvases[-1].put_object(block, xpos + int(delta/2.), 0)
+            
+        for canvas in canvases:
+            self.current_page.append(canvas)
+        self.end_current_block()
+        
+    
    def writeto(self, path, lrs=False):
        self.book.renderLrs(path) if lrs else self.book.renderLrf(path)
        
    def cleanup(self):
-        for _file in self.scaled_images.values():   
+        for _file in self.scaled_images.values() + self.rotated_images.values():   
            _file.__del__()
-        
+
 def process_file(path, options):
    cwd = os.getcwd()
    dirpath = None
@ -1070,7 +1121,7 @@ def process_file(path, options):
                tim.save(tf.name)
                tpath = tf.name
            else:
-                raise ConversionError, 'Cannot read from: %s', (options.cover,)
+                raise ConversionError, 'Cannot read from: %s'% (options.cover,)
        
                    
        if not options.title:
--- a/src/libprs500/ebooks/lrf/html/demo/demo.html
+++ b/src/libprs500/ebooks/lrf/html/demo/demo.html
@ -2,20 +2,23 @@
 <head>
 <style type='text/css'>
 .toc { page-break-after: always; text-indent: 0em; }
+.tocpn            {text-align: right; }
+.tocchr           {text-align: right; font-variant: small-caps;}
 </style>
 </head>
  <h1>Demo of <span style='font-family:monospace'>html2lrf</span></h1>  
  <p>
-  This file contains a demonstration of the capabilities of   <span style='font-family:monospace'>html2lrf,</span>   the HTML to LRF converter   from <em>libprs500.</em> To obtain libprs500 visit  <span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
+  This file contains a demonstration of the capabilities of   <span style='font-family:monospace'>html2lrf,</span>   the HTML to LRF converter   from <em>libprs500.</em> To obtain libprs500 visit<br/><span style='font:sans-serif'>https://libprs500.kovidgoyal.net</span>
  </p>
  <br/>
  <h2><a name='toc'>Table of Contents</a></h2>
  <ul style='page-break-after:always'>
-    <li><a href='#lists'>Demonstration of Lists</a></li>
+    <li><a href='#lists'>Lists</a></li>
+    <li><a href='#tables'>Tables</a></li>
    <li><a href='#text'>Text formatting and ruled lines</a></li>
    <li><a href='#images'>Inline images</a></li>
    <li><a href='#recursive'>Recursive link following</a></li>
-    <li><a href='demo_ext.html'>The HTML used to create this file</a>
+    <!--<li><a href='demo_ext.html'>The HTML used to create this file</a>-->
 </ul>

 <h2><a name='lists'>Lists</a></h2>
@ -40,6 +43,53 @@
 <a href='#toc'>Table of Contents</a>
 </p>

+ <h2><a name='tables'>Tables</a></h2>
+ <p>
+ Because I can!
+ </p>
+ <br/>
+
+ <table>
+ <tr><td colspan=4><h3 style="text-align:center">A matrix</h3></td></tr>
+ <tr><td></td><td style="text-align:center"><b>Column 1</b></td><td style="text-align:center"><b>Column 2</b></td><td style="text-align:center"><b>Column 3</b></td></tr>
+ <tr><td><b>Row 1</b></td><td><p style="text-align:center">(1, 1)</p></tr>
+ <tr><td><b>Row 2</b></td><td></td><td style="text-align:center"><p>(2, 2)</p></td><td></td></tr>
+ <tr><td><b>Row 3</b></td><td></td><td></td><td><p style="text-align:center">(3, 3)</p></td></tr>
+ </table>
+ <br/>
+ <p>
+ html2lrf supports both rowspan and colspan, but no other HTML table attributes, as it uses its own algorithm to determine optimal placement of cells. 
+ </p>
+ <br/>
+ <p>
+ The table conversion code is very new and likely to be swarming with bugs, so please report them at <br/><font name="monospace>https://libprs500.kovidgoyal.net/newticket</font>
+ </p>
+ <br/>
+ <p style="page-break-after:always">
+ On the next page you'll see a real life example taken from a Project Gutenberg text with no modifications. It shows off html2lrf's handling of rowspan and colspan.
+ </p>
+ <h3 align="center">Sample Complex Table of Contents</h3>
+ <table summary="TOC">
+ <tr><td colspan="3">&nbsp;</td><td align="right">PAGE</td></tr>
+ <tr><td class="tocch" colspan="3"><a href="#PREFACE">Preface</a></td><td class="tocpn">v</td></tr>
+ <tr><td class="tocch" colspan="3"><a href="#REFERENCE_WORKS">List of Works of Reference</a></td><td class="tocpn">vii</td></tr>
+ <tr><td class="tocch" colspan="3"><a href="#LIST_OF_ILLUSTRATIONS">List of Illustrations</a></td><td class="tocpn">xi</td></tr>
+ <tr><td class="tocch">Chapter</td><td class="tocchr">I.</td><td class="tocch"><a href="#CHAPTER_I">History of the Foundation</a></td><td class="tocpn">3</td></tr>
+ <tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#CHAPTER_II">Exterior of the Church</a></td><td class="tocpn">25</td></tr>
+ <tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#CHAPTER_III">Interior of the Church</a></td><td class="tocpn">33</td></tr>
+ <tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#CHAPTER_IV">St. Bartholomew-the-Less and the Hospital</a></td><td class="tocpn">63</td></tr>
+ <tr><td class="tocch">Appendix</td><td class="tocchr">I.</td><td class="tocch"><a href="#APPENDIX_I">The Priory Seals</a></td><td class="tocpn">73</td></tr>
+ <tr><td class="tocchr" colspan="2">II.</td><td class="tocch"><a href="#APPENDIX_II">The Priors and Rectors</a></td><td class="tocpn">77</td></tr>
+ <tr><td class="tocchr" colspan="2">III.</td><td class="tocch"><a href="#APPENDIX_III">Inventory of Vestments, etc.</a></td><td class="tocpn">79</td></tr>
+ <tr><td class="tocchr" colspan="2">IV.</td><td class="tocch"><a href="#APPENDIX_IV">The Organ</a></td><td class="tocpn">80</td></tr>
+ <tr><td class="tocch" colspan="3"><a href="#INDEX">Index</a></td><td class="tocpn">83</td></tr>
+ </table>
+ 
+ <p class='toc'>
+ <hr />
+ <a href='#toc'>Table of Contents</a>
+ </p>
+
 <h2><a name='text'>Text formatting</a></h2>
 <p>
 A simple <i>paragraph</i> of <b>formatted 
--- a/src/libprs500/ebooks/lrf/html/table.py
+++ b/src/libprs500/ebooks/lrf/html/table.py
@ -0,0 +1,306 @@
+##    Copyright (C) 2006 Kovid Goyal kovid@kovidgoyal.net
+##    This program is free software; you can redistribute it and/or modify
+##    it under the terms of the GNU General Public License as published by
+##    the Free Software Foundation; either version 2 of the License, or
+##    (at your option) any later version.
+##
+##    This program is distributed in the hope that it will be useful,
+##    but WITHOUT ANY WARRANTY; without even the implied warranty of
+##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+##    GNU General Public License for more details.
+##
+##    You should have received a copy of the GNU General Public License along
+##    with this program; if not, write to the Free Software Foundation, Inc.,
+##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+import math, sys
+
+from libprs500.ebooks.lrf.fonts import get_font
+from libprs500.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
+                                             CharButton, Plot, Paragraph, \
+                                             LrsTextTag
+
+def ceil(num):
+    return int(math.ceil(num))
+
+def print_xml(elem):
+    from libprs500.ebooks.lrf.pylrs.pylrs import ElementWriter    
+    elem = elem.toElement('utf8')
+    ew = ElementWriter(elem, sourceEncoding='utf8')
+    ew.write(sys.stdout)
+    print
+    
+def cattrs(base, extra):
+    new = base.copy()
+    new.update(extra)
+    return new
+    
+def tokens(tb):
+    '''
+    Return the next token. A token is :
+    1. A string 
+    a block of text that has the same style
+    '''        
+    def process_element(x, attrs):
+        if isinstance(x, CR):
+            yield 2, None
+        elif isinstance(x, Text):
+            yield x.text, cattrs(attrs, {})
+        elif isinstance(x, basestring):
+            yield x, cattrs(attrs, {})
+        elif isinstance(x, (CharButton, LrsTextTag)):
+            if x.contents:
+                yield x.contents[0].text, cattrs(attrs, {})
+        elif isinstance(x, Plot):
+            yield x, None
+        elif isinstance(x, Span):
+            attrs = cattrs(attrs, x.attrs)
+            for y in x.contents:
+                for z in process_element(y, attrs):
+                    yield z
+        
+            
+    for i in tb.contents:
+        if isinstance(i, CR):
+            yield 1, None
+        elif isinstance(i, Paragraph):
+            for j in i.contents: 
+                attrs = {}
+                if hasattr(j, 'attrs'):
+                    attrs = j.attrs
+                for k in process_element(j, attrs):                    
+                    yield k
+    
+
+class Cell(object):
+    
+    def __init__(self, conv, cell, css):
+        self.conv = conv
+        self.cell = cell
+        self.css  = css
+        self.text_blocks = []
+        self.rowspan = self.colspan = 1
+        try:
+            self.colspan = int(cell['colspan']) if cell.has_key('colspan') else 1
+            self.rowspan = int(cell['rowspan']) if cell.has_key('rowspan') else 1
+        except:
+            if conv.verbose:
+                print >>sys.stderr, "Error reading row/colspan for ", cell
+                
+        pp = conv.current_page
+        conv.book.allow_new_page = False
+        conv.anchor_to_previous = pp
+        conv.current_page = conv.book.create_page()
+        conv.parse_tag(cell, css)
+        conv.end_current_block()
+        for item in conv.current_page.contents:
+            if isinstance(item, TextBlock):
+                self.text_blocks.append(item)
+        conv.current_page = pp
+        conv.book.allow_new_page = True
+        conv.anchor_to_previous = None
+        if not self.text_blocks:
+            tb = conv.book.create_text_block()
+            tb.Paragraph(' ')
+            self.text_blocks.append(tb)
+        for tb in self.text_blocks:
+            tb.parent = None
+            tb.objId  = 0
+            # Needed as we have to eventually change this BlockStyle's width and 
+            # height attributes. This blockstyle may be shared with other
+            # elements, so doing that causes havoc.
+            tb.blockStyle = conv.book.create_block_style()
+            ts = conv.book.create_text_style(**tb.textStyle.attrs)
+            ts.attrs['parindent'] = 0
+            tb.textStyle = ts
+            if ts.attrs['align'] == 'foot':
+                if isinstance(tb.contents[-1], Paragraph):
+                    tb.contents[-1].append(' ')
+        
+        
+        
+            
+    def pts_to_pixels(self, pts):
+        pts = int(pts)
+        return ceil((float(self.conv.profile.dpi)/72)*(pts/10.))
+    
+    def text_block_size(self, tb, maxwidth=sys.maxint, debug=False):
+        ts = tb.textStyle.attrs
+        default_font = get_font(ts['fontfacename'], self.pts_to_pixels(ts['fontsize']))
+        parindent = self.pts_to_pixels(ts['parindent'])
+        ls, ws = self.pts_to_pixels(ts['linespace']), self.pts_to_pixels(ts['wordspace'])
+        top, bottom, left, right = 0, 0, parindent, parindent
+        
+        def add_word(width, height, left, right, top, bottom):            
+            if left + width > maxwidth:
+                left = width + ws
+                top += height + ls
+                bottom = top+height if top+height > bottom else bottom
+            else:
+                left += (width + ws)
+                right = left if left > right else right                    
+                bottom = top+height if top+height > bottom else bottom
+            return left, right, top, bottom
+        
+        for token, attrs in tokens(tb):
+            font = default_font
+            if isinstance(token, int): # Handle para and line breaks
+                top = bottom
+                left = parindent if int == 1 else 0
+                continue
+            if isinstance(token, Plot):
+                width, height = self.pts_to_pixels(token.xsize), self.pts_to_pixels(token.ysize)
+                left, right, top, bottom = add_word(width, height, left, right, top, bottom)
+                continue
+            ff = attrs.get('fontfacename', ts['fontfacename'])
+            fs = attrs.get('fontsize', ts['fontsize'])
+            if (ff, fs) != (ts['fontfacename'], ts['fontsize']):
+                font = get_font(ff, self.pts_to_pixels(fs))
+            for word in token.split():
+                width, height = font.getsize(word)
+                left, right, top, bottom = add_word(width, height, left, right, top, bottom)
+        return right+3, bottom
+                
+    def text_block_preferred_width(self, tb, debug=False):
+        return self.text_block_size(tb, sys.maxint, debug=debug)[0]
+    
+    def preferred_width(self, debug=False):
+        return ceil(max([self.text_block_preferred_width(i, debug=debug) for i in self.text_blocks]))
+    
+    def height(self, width):
+        return sum([self.text_block_size(i, width)[1] for i in self.text_blocks])
+        
+            
+
+class Row(object):
+    def __init__(self, conv, row, css, colpad):
+        self.cells = []
+        self.colpad = colpad
+        cells = row.findAll('td')
+        for cell in cells:
+            ccss = conv.tag_css(cell, css)
+            self.cells.append(Cell(conv, cell, ccss))        
+            
+    def number_of_cells(self):
+        '''Number of cells in this row. Respects colspan'''
+        ans = 0
+        for cell in self.cells:
+            ans += cell.colspan
+        return ans
+    
+    def height(self, widths):
+        i, heights = 0, []
+        for cell in self.cells:
+            width = sum(widths[i:i+cell.colspan])
+            heights.append(cell.height(width))
+            i += cell.colspan
+        return max(heights)
+    
+    def preferred_width(self, col):
+        i = -1
+        cell = None        
+        for cell in self.cells:            
+            for k in range(0, cell.colspan):
+                if i == col:
+                    break
+                i += 1
+            if i == col:
+                break
+        
+        return 0 if cell.colspan > 1 else cell.preferred_width()
+    
+    def cell_iterator(self):
+        for c in self.cells:
+            yield c
+        
+    
+class Table(object):
+    def __init__(self, conv, table, css, rowpad=10, colpad=10):
+        self.rows = []
+        self.conv = conv
+        self.rowpad = rowpad
+        self.colpad = colpad
+        rows = table.findAll('tr')
+        for row in rows:            
+            rcss = conv.tag_css(row, css)
+            self.rows.append(Row(conv, row, rcss, colpad))
+            
+    def number_of_columns(self):
+        max = 0
+        for row in self.rows:
+            max = row.number_of_cells() if row.number_of_cells() > max else max
+        return max
+    
+    def number_or_rows(self):
+        return len(self.rows)
+            
+    def height(self, maxwidth):
+        ''' Return row heights + self.rowpad'''
+        widths = self.get_widths(maxwidth)
+        return sum([row.height(widths) + self.rowpad for row in self.rows]) - self.rowpad
+    
+    def get_widths(self, maxwidth):
+        '''
+        Return widths of columns + sefl.colpad
+        '''
+        rows, cols = self.number_or_rows(), self.number_of_columns()
+        widths = range(cols)
+        for c in range(cols):
+            cellwidths = [ 0 for i in range(rows)]
+            for r in range(rows):
+                try:
+                    cellwidths[r] = self.rows[r].preferred_width(c)
+                except IndexError:
+                    continue                 
+            widths[c] = max(cellwidths)
+        itercount = 0
+        while sum(widths) > maxwidth-((len(widths)-1)*self.colpad) and itercount < 100:
+            widths = [ceil((95./100.)*w) for w in widths]
+            itercount += 1
+        return [i+self.colpad for i in widths]
+    
+    def blocks(self, maxwidth):       
+        rows, cols = self.number_or_rows(), self.number_of_columns()
+        cellmatrix = [[None for c in range(cols)] for r in range(rows)]        
+        rowpos = [0 for i in range(rows)]
+        for r in range(rows):
+            nc = self.rows[r].cell_iterator()
+            try:
+                while True:
+                    cell = nc.next()
+                    cellmatrix[r][rowpos[r]] = cell
+                    rowpos[r] += cell.colspan
+                    for k in range(1, cell.rowspan):
+                        try:
+                            rowpos[r+k] += 1
+                        except IndexError:
+                            break
+            except StopIteration: # No more cells in this row
+                continue
+            
+            
+        widths = self.get_widths(maxwidth)
+        heights = [row.height(widths) for row in self.rows]
+                
+        xpos = [sum(widths[:i]) for i in range(cols)]
+        delta = maxwidth - sum(widths)
+        if delta < 0: 
+            delta = 0
+        for r in range(len(cellmatrix)):
+            yield None, 0, heights[r], 0
+            for c in range(len(cellmatrix[r])):
+                cell = cellmatrix[r][c]
+                if not cell:
+                    continue
+                width = sum(widths[c:c+cell.colspan])
+                sypos = 0
+                for tb in cell.text_blocks:
+                    tb.blockStyle = self.conv.book.create_block_style(
+                                    blockwidth=width, 
+                                    blockheight=cell.text_block_size(tb, width)[1])
+                    
+                    yield tb, xpos[c], sypos, delta
+                    sypos += tb.blockStyle.attrs['blockheight']
+                
+            
+        
+                
--- a/src/libprs500/ebooks/lrf/meta.py
+++ b/src/libprs500/ebooks/lrf/meta.py
@ -94,7 +94,12 @@ class xml_attr_field(object):
        
    def __get__(self, obj, typ=None):
        """ Return the data in this field or '' if the field is empty """
-        document = dom.parseString(obj.info)
+        try:
+            document = dom.parseString(obj.info)
+        except Exception, err:
+            print >>sys.stderr, "Could not parse XML:", err
+            print obj.info
+            raise
        elems = document.getElementsByTagName(self.tag_name)
        if len(elems):
            elem = None
@ -108,7 +113,12 @@ class xml_attr_field(object):
    def __set__(self, obj, val):
        if val == None:
            val = ""
-        document = dom.parseString(obj.info)
+        try:
+            document = dom.parseString(obj.info)
+        except Exception, err:
+            print >>sys.stderr, "Could not parse XML:", err
+            print obj.info
+            raise
        elems = document.getElementsByTagName(self.tag_name)
        if len(elems):
            elem = None
@ -142,7 +152,13 @@ class xml_field(object):
        
    def __get__(self, obj, typ=None): 
        """ Return the data in this field or '' if the field is empty """
-        document = dom.parseString(obj.info)
+        try:
+            document = dom.parseString(obj.info)
+        except Exception, err:
+            print >>sys.stderr, "Could not parse XML:", err
+            print obj.info
+            raise
+            
        elems = document.getElementsByTagName(self.tag_name)
        if len(elems):
            elem = None
@ -158,7 +174,12 @@ class xml_field(object):
    def __set__(self, obj, val):
        if val == None:
            val = ""
-        document = dom.parseString(obj.info)
+        try:
+            document = dom.parseString(obj.info)
+        except Exception, err:
+            print >>sys.stderr, "Could not parse XML:", err
+            print obj.info
+            raise
        def create_elem():
            elem = document.createElement(self.tag_name)
            elem.appendChild(dom.Text())
--- a/src/libprs500/ebooks/lrf/pylrs/pylrs.py
+++ b/src/libprs500/ebooks/lrf/pylrs/pylrs.py
@ -56,6 +56,8 @@ DEFAULT_GENREADING      = "fs"          # default is yes to both lrf and lrs
 class LrsError(Exception):
    pass

+class ContentError(Exception):
+    pass

 def _checkExists(filename):
    if not os.path.exists(filename):
@ -435,6 +437,8 @@ class Book(Delegator):
        self.applySetting("sourceencoding", DEFAULT_SOURCE_ENCODING)
        
        self.applySettings(settings, testValid=True)
+        
+        self.allow_new_page = True #: If False L{create_page} raises an exception

    def create_text_style(self, **settings):
        ans = TextStyle(**self.defaultTextStyle.attrs.copy())
@ -447,6 +451,8 @@ class Book(Delegator):
        return ans
        
    def create_page_style(self, **settings):
+        if not self.allow_new_page:
+            raise ContentError
        ans = PageStyle(**self.defaultPageStyle.attrs.copy())
        ans.update(settings)
        return ans
@ -641,12 +647,15 @@ class TableOfContents(object):
            raise LrsError, "TOC destination must be a TextBlock, ImageBlock or RuledLine"+\
                            " not a " + str(type(textBlock))

-        if textBlock.parent is None or not isinstance(textBlock.parent, Page):
+        if textBlock.parent is None:
            raise LrsError, "TOC text block must be already appended to a page"

        if textBlock.parent.parent is None:
            raise LrsError, \
                    "TOC destination page must be already appended to a book"
+                    
+        if not hasattr(textBlock.parent, 'objId'):
+            raise LrsError, "TOC destination must be appended to a container with an objID"

        self.tocEntries.append(TocLabel(tocLabel, textBlock))
        textBlock.tocLabel = tocLabel
@ -1373,7 +1382,6 @@ class TextBlock(LrsObject, LrsContainer):

        self.textSettings = {}
        self.blockSettings = {}
-
        
        for name, value in settings.items():
            if name in TextStyle.validSettings:
@ -1428,7 +1436,6 @@ class TextBlock(LrsObject, LrsContainer):
            tb.append(content.toElement(sourceEncoding))
            
        return tb
-
    
    def getReferencedObjIds(self):
        ids = [self.objId, self.extraId, self.blockStyle.objId,
@ -2111,7 +2118,7 @@ class PutObj(LrsContainer):
        self.y1 = int(y)


-    def appendReferencedObjects(self, parent):
+    def appendReferencedObjects(self, parent):        
        if self.content.parent is None:
            parent.append(self.content)

--- a/src/libprs500/ebooks/lrf/txt/convert_from.py
+++ b/src/libprs500/ebooks/lrf/txt/convert_from.py
@ -17,6 +17,7 @@ Convert .txt files to .lrf
 """
 import os, sys

+from libprs500.ebooks import BeautifulSoup
 from libprs500.ebooks.lrf import ConversionError, option_parser
 from libprs500.ebooks.lrf import Book
 from libprs500.ebooks.lrf.pylrs.pylrs import Paragraph, Italic, Bold, BookSetting
@ -63,7 +64,7 @@ def convert_txt(path, options):
                    C{author}, C{title}, C{encoding} (the assumed encoding of 
                    the text in C{path}.)
    """
-    import fileinput
+    import codecs
    header = None
    if options.header:
        header = Paragraph()
@ -84,7 +85,19 @@ def convert_txt(path, options):
    block = book.create_text_block()
    pg.append(block)
    book.append(pg)
-    for line in fileinput.input(path):
+    lines = ""
+    try:
+        lines = codecs.open(path, 'rb', 'ascii').readlines()
+        print 'huh'
+    except UnicodeDecodeError:
+            try:
+                lines = codecs.open(path, 'rb', 'cp1252').readlines()
+            except UnicodeDecodeError:
+                try:
+                    lines = codecs.open(path, 'rb', 'iso-8859-1').readlines()
+                except UnicodeDecodeError:
+                    lines = codecs.open(path, 'rb', 'utf8').readlines()
+    for line in lines:
        line = line.strip()
        if line:
            buffer = buffer.rstrip() + ' ' + line