Implement order of magnitude faster lrf2lrs

2025-07-09 03:04:10 -04:00 · 2007-09-17 03:13:44 +00:00 · 2007-09-17 03:13:44 +00:00 · 51458628ab
commit 51458628ab
parent bddd55a031
8 changed files with 835 additions and 1966 deletions
--- a/setup.py
+++ b/setup.py
@ -33,7 +33,7 @@ entry_points = {
                             'web2lrf  = libprs500.ebooks.lrf.web.convert_from:main',\
                             'pdf2lrf  = libprs500.ebooks.lrf.pdf.convert_from:main',\
                             'any2lrf  = libprs500.ebooks.lrf.any.convert_from:main',\
-                             'lrf2lrs  = libprs500.ebooks.lrf.lrs.convert_to:main',\
+                             'lrf2lrs  = libprs500.ebooks.lrf.parser:main',\
                             'libprs500-beta = libprs500.gui2.main:main',\
                           ], 
        'gui_scripts'    : [ APPNAME+' = libprs500.gui.main:main']
--- a/src/libprs500/ebooks/lrf/init.py
+++ b/src/libprs500/ebooks/lrf/init.py
@ -31,6 +31,9 @@ from libprs500 import iswindows

 __docformat__ = "epytext"

+class LRFParseError(Exception):
+    pass
+

 class PRS500_PROFILE(object):
    screen_width  = 600
--- a/src/libprs500/ebooks/lrf/lrs/convert_to.py
+++ b/src/libprs500/ebooks/lrf/lrs/convert_to.py
--- a/src/libprs500/ebooks/lrf/lrs/tags.py
+++ b/src/libprs500/ebooks/lrf/lrs/tags.py
@ -1,257 +0,0 @@
-import struct, StringIO
-
-tagparams = {}
-tagnames = {}
-
-class LRFTagException(Exception):
-    def __init__(self,msg):
-        self.msg=msg
-    def __str__(self):
-        return repr(self.msg)
-
-def getByte(f):
-    return struct.unpack("<B",f.read(1))[0];
-
-def getWord(f):
-    return struct.unpack("<H",f.read(2))[0];
-
-def getDWord(f):
-    return struct.unpack("<I",f.read(4))[0];
-
-def getQWord(f):
-    return struct.unpack("<Q",f.read(8))[0];
-
-def def_tag(val, params, name=None):
-    tagparams[val] = params
-    if (name): tagnames[val] = name
-
-class LRFTag:
-    def __init__(self,f):
-        if isinstance(f, basestring):
-            f = StringIO.StringIO(f)
-        self.fileOffset = f.tell()
-        tagId = struct.unpack("<BB",f.read(2))
-        if tagId[1]!=0xF5: raise LRFTagException("Bad tag ID")
-        if tagId[0] not in tagparams: raise LRFTagException("Unknown tag ID: F5%02X" % tagId[0])
-        params = tagparams[tagId[0]]
-        if type(params) is int:
-            self.params = f.read(params)
-        else:
-            self.params = params(f)
-        if tagId[0] in tagnames: self.name = tagnames[tagId[0]]
-        #else:
-        #    raise LRFException("No param parser for tag ID: F5%02X" % tagId[0])
-        self.tagId = 0xF500 + tagId[0]
-    def __str__(self):
-        s = "Tag %04X" % self.tagId
-        if hasattr(self,"name"): s+=" (%s)" % self.name
-        s += " at %08X, params: " % (self.fileOffset) + repr(self.params)
-        return s
-    def paramDWord(self):
-        if len(self.params)!=4:
-            raise LRFTagException("Bad parameter for tag ID: %04X" % self.tagId)
-        return struct.unpack("<I",self.params)[0];
-    def paramWord(self):
-        if len(self.params)!=2:
-            raise LRFTagException("Bad parameter for tag ID: %04X" % self.tagId)
-        return struct.unpack("<H",self.params)[0];
-    def paramSWord(self):
-        if len(self.params)!=2:
-            raise LRFTagException("Bad parameter for tag ID: %04X" % self.tagId)
-        return struct.unpack("<h",self.params)[0];
-
-#<word> count, then count <dword>s
-def Tag0B_5CParser(f):
-    cnt = getWord(f)
-    res = []
-    while cnt>0:
-        res.append(getDWord(f))
-        cnt -= 1
-    return res
-
-def DummyTagParser(f):
-    raise LRFTagException("Uknown dummy tag at %08X" % f.tell())
-
-#<word> size, then string of size bytes
-def TagStringParser(f):
-    cnt = getWord(f)
-    return unicode(f.read(cnt),"utf_16")
-
-#<dword>, then <FF16> <w:len> <len string> <w2>
-def Tag78Parser(f):
-    pos = f.tell()
-    res = []
-    res.append(getDWord(f))
-    tag = LRFTag(f)
-    if tag.tagId != 0xF516: raise LRFTagException("Bad tag 78 at %08X" % pos)
-    res.append(tag.params)
-    res.append(getWord(f))
-    return res
-
-def_tag(0x00, 6, "*ObjectStart")
-def_tag(0x01, 0, "*ObjectEnd")
-def_tag(0x02, 4, "*ObjectInfoLink")
-def_tag(0x03, 4, "*Link")
-def_tag(0x04, 4, "*StreamSize")
-def_tag(0x05, 0, "*StreamStart")
-def_tag(0x06, 0, "*StreamEnd")
-def_tag(0x07, 4)
-def_tag(0x08, 4)
-def_tag(0x09, 4)
-def_tag(0x0A, 4)
-def_tag(0x0B, Tag0B_5CParser, "*ContainedObjectsList")
-def_tag(0x0D, 2)
-def_tag(0x0E, 2)
-def_tag(0x11, 2)
-def_tag(0x12, 2)
-def_tag(0x13, 2)
-def_tag(0x14, 2)
-def_tag(0x15, 2)
-def_tag(0x16, TagStringParser)
-def_tag(0x17, 4)
-def_tag(0x18, 4)
-def_tag(0x19, 2)
-def_tag(0x1A, 2)
-def_tag(0x1B, 2)
-def_tag(0x1C, 2)
-def_tag(0x1D, 2)
-def_tag(0x1E, 2)
-def_tag(0x21, 2)
-def_tag(0x22, 2)
-def_tag(0x23, 2)
-def_tag(0x24, 2)
-def_tag(0x25, 2)
-def_tag(0x26, 2)
-def_tag(0x27, 2)
-def_tag(0x28, 2)
-def_tag(0x29, 6)
-def_tag(0x2A, 2)
-def_tag(0x2B, 2)
-def_tag(0x2C, 2)
-def_tag(0x2D, 4)
-def_tag(0x2E, 2)
-def_tag(0x31, 2)
-def_tag(0x32, 2)
-def_tag(0x33, 2)
-def_tag(0x34, 4)
-def_tag(0x35, 2)
-def_tag(0x36, 2)
-def_tag(0x37, 4)
-def_tag(0x38, 2)
-def_tag(0x39, 2)
-def_tag(0x3A, 2)
-def_tag(0x3C, 2)
-def_tag(0x3D, 2)
-def_tag(0x3E, 2)
-def_tag(0x41, 2)
-def_tag(0x42, 2)
-def_tag(0x44, 4)
-def_tag(0x45, 4)
-def_tag(0x46, 2)
-def_tag(0x47, 2)
-def_tag(0x48, 2)
-def_tag(0x49, 8)
-def_tag(0x4A, 8)
-def_tag(0x4B, 4)
-def_tag(0x4C, 4)
-def_tag(0x4D, 0)
-def_tag(0x4E, 12)
-def_tag(0x51, 2)
-def_tag(0x52, 2)
-def_tag(0x53, 4)
-def_tag(0x54, 2, "*StreamFlags")
-def_tag(0x55, TagStringParser)
-def_tag(0x56, 2)
-def_tag(0x57, 2)
-def_tag(0x58, 2)
-def_tag(0x59, TagStringParser)
-def_tag(0x5A, TagStringParser)
-def_tag(0x5B, 4)
-def_tag(0x5C, Tag0B_5CParser)
-def_tag(0x5D, TagStringParser)
-def_tag(0x5E, 2)
-def_tag(0x61, 2)
-def_tag(0x62, 0)
-def_tag(0x63, 0)
-def_tag(0x64, 0)
-def_tag(0x65, 0)
-def_tag(0x66, 0)
-def_tag(0x67, 0)
-def_tag(0x68, 0)
-def_tag(0x69, 0)
-def_tag(0x6A, 0)
-def_tag(0x6B, 0)
-def_tag(0x6C, 8)
-def_tag(0x6D, 2)
-def_tag(0x6E, 0)
-def_tag(0x71, 0)
-def_tag(0x72, 0)
-def_tag(0x73, 10)
-def_tag(0x75, 2)
-def_tag(0x76, 2)
-def_tag(0x77, 2)
-def_tag(0x78, Tag78Parser)
-def_tag(0x79, 2)
-def_tag(0x7A, 2)
-def_tag(0x7B, 4)
-def_tag(0x7C, 4, "*ParentPageTree")
-def_tag(0x81, 0)
-def_tag(0x82, 0)
-def_tag(0xA1, 4)
-def_tag(0xA2, 0)
-def_tag(0xA5, DummyTagParser)
-def_tag(0xA6, 0)
-def_tag(0xA7, 4)
-def_tag(0xA8, 0)
-def_tag(0xA9, 0)
-def_tag(0xAA, 0)
-def_tag(0xAB, 0)
-def_tag(0xAC, 0)
-def_tag(0xAD, 0)
-def_tag(0xAE, 0)
-def_tag(0xB1, 0)
-def_tag(0xB2, 0)
-def_tag(0xB3, 0)
-def_tag(0xB4, 0)
-def_tag(0xB5, 0)
-def_tag(0xB6, 0)
-def_tag(0xB7, 0)
-def_tag(0xB8, 0)
-def_tag(0xB9, 0)
-def_tag(0xBA, 0)
-def_tag(0xBB, 0)
-def_tag(0xBC, 0)
-def_tag(0xBD, 0)
-def_tag(0xBE, 0)
-def_tag(0xC1, 0)
-def_tag(0xC2, 0)
-def_tag(0xC3, 2)
-def_tag(0xC4, 0)
-def_tag(0xC5, 2)
-def_tag(0xC6, 2)
-def_tag(0xC7, 0)
-def_tag(0xC8, 2)
-def_tag(0xC9, 0)
-def_tag(0xCA, 2)
-def_tag(0xCB, DummyTagParser)
-def_tag(0xCC, 2)
-def_tag(0xD1, 12)
-def_tag(0xD2, 0)
-def_tag(0xD4, 2)
-def_tag(0xD6, 0)
-def_tag(0xD7, 14)
-def_tag(0xD8, 4)
-def_tag(0xD9, 8)
-def_tag(0xDA, 2)
-def_tag(0xDB, 2)
-def_tag(0xDC, 2)
-def_tag(0xDD, 2)
-def_tag(0xF1, 2)
-def_tag(0xF2, 4)
-def_tag(0xF3, 4)
-def_tag(0xF4, 2)
-def_tag(0xF5, 4)
-def_tag(0xF6, 4)
-def_tag(0xF7, 4)
-def_tag(0xF8, 4)
-def_tag(0xF9, 6)
--- a/src/libprs500/ebooks/lrf/meta.py
+++ b/src/libprs500/ebooks/lrf/meta.py
@ -297,7 +297,7 @@ class LRFMetaFile(object):
    version                  = field(fmt=WORD, start=0x8)
    xor_key                  = field(fmt=WORD, start=0xa)
    root_object_id           = field(fmt=DWORD, start=0xc)
-    number_of_objets         = field(fmt=QWORD, start=0x10)
+    number_of_objects        = field(fmt=QWORD, start=0x10)
    object_index_offset      = field(fmt=QWORD, start=0x18)
    binding                  = field(fmt=BYTE, start=0x24)
    dpi                      = field(fmt=WORD, start=0x26)
@ -329,7 +329,7 @@ class LRFMetaFile(object):
    # Format is %Y-%m-%d
    creation_date         = xml_field("CreationDate", parent="DocInfo") 
    producer              = xml_field("Producer", parent="DocInfo")
-    page                  = xml_field("Page", parent="DocInfo")
+    page                  = xml_field("SumPage", parent="DocInfo")
    
    def safe(func):
        """ 
@ -547,7 +547,7 @@ class LRFMetaFile(object):
        elif ttype == 0x12:
            ext = "png"
        elif ttype == 0x13:
-            ext = "bm"
+            ext = "bmp"
        return ext
        
    def fix_thumbnail_type(self):
--- a/src/libprs500/ebooks/lrf/objects.py
+++ b/src/libprs500/ebooks/lrf/objects.py
@ -12,7 +12,7 @@
 ##    You should have received a copy of the GNU General Public License along
 ##    with this program; if not, write to the Free Software Foundation, Inc.,
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-import struct, array, zlib
+import struct, array, zlib, cStringIO

 from libprs500.ebooks.lrf import LRFParseError
 from libprs500.ebooks.lrf.tags import Tag
@ -47,10 +47,32 @@ class LRFObject(object):
    @classmethod
    def parse_empdots(self, tag, f):
        self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents
-    
-    def __init__(self, stream, id, scramble_key, boundary):
-        self._scramble_key = scramble_key
        
+    @staticmethod
+    def tag_to_val(h, obj, tag, stream):
+        if h[1] == 'D':
+            val = tag.dword
+        elif h[1] == 'W':
+            val = tag.word
+        elif h[1] == 'w':
+            val = tag.word
+            if val > 0x8000: 
+                val -= 0x10000
+        elif h[1] == 'B':
+            val = tag.byte
+        elif h[1] == 'P':
+            val = tag.contents
+        elif h[1] != '':
+            val = getattr(obj, h[1])(tag, stream)
+        if len(h) > 2:
+            val = h[2](val) if callable(h[2]) else h[2][val]
+        return val
+    
+    def __init__(self, document, stream, id, scramble_key, boundary):
+        self._scramble_key = scramble_key
+        self._document = document
+        self.id = id
+                
        while stream.tell() < boundary:
            tag = Tag(stream)
            self.handle_tag(tag, stream)
@ -60,31 +82,68 @@ class LRFObject(object):
    
    def handle_tag(self, tag, stream, tag_map=None):
        if tag_map is None:
-            tag_map = self.__class__.tag_map
+            tag_map = self.__class__.tag_map             
        if tag.id in tag_map:
            h = tag_map[tag.id]
-            if h[1] == 'D':
-                val = tag.dword
-            elif h[1] == 'W':
-                val = tag.word
-            elif h[1] == 'w':
-                val = tag.word
-                if val > 0x8000: 
-                    val -= 0x10000
-            elif h[1] == 'B':
-                val = tag.paramByte()
-            elif h[1] == 'P':
-                val = tag.contents
-            elif h[1] != '':
-                val = getattr(self, h[1])(tag, stream)
-    
+            val = LRFObject.tag_to_val(h, self, tag, stream)
            if h[1] != '' and h[0] != '':
-                if len(h) > 2:
-                    val = h[2][val]
                setattr(self, h[0], val)
        else:    
            raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag)))
        
+    def __iter__(self):
+        for i in range(0):
+            yield i
+        
+    def __unicode__(self):
+        return unicode(self.__class__.__name__)
+        
+    def __str__(self):
+        return unicode(self)
+
+class LRFContentObject(LRFObject):
+    
+    tag_map = {}
+    
+    def __init__(self, bytes, objects):
+        self.stream = bytes if hasattr(bytes, 'read') else cStringIO.StringIO(bytes)
+        length = self.stream_size()
+        self.objects = objects
+        self._contents = []
+        self.current = 0
+        self.in_container = True
+        self.parse_stream(length)
+        
+    def parse_stream(self, length):    
+        while self.in_container and self.stream.tell() < length:
+            tag = Tag(self.stream)
+            self.handle_tag(tag)
+            
+    def stream_size(self):
+        pos = self.stream.tell()
+        self.stream.seek(0, 2)
+        size = self.stream.tell()
+        self.stream.seek(pos)
+        return size
+            
+    def handle_tag(self, tag):
+        if tag.id in self.tag_map:
+            action = self.tag_map[tag.id]
+            if isinstance(action, basestring):
+                func, args = action, tuple([])
+            else:
+                func, args = action[0], (action[1],)
+            getattr(self, func)(tag, *args)
+        else:
+            raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag)))
+        
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+        
+    
+
+    
 class LRFStream(LRFObject):
    tag_map = {
        0xF504: ['', 'read_stream_size'],
@ -94,11 +153,11 @@ class LRFStream(LRFObject):
      }
    tag_map.update(LRFObject.tag_map)
    
-    def __init__(self, stream, id, scramble_key, boundary):
+    def __init__(self, document, stream, id, scramble_key, boundary):
        self.stream = ''
        self.stream_size = 0
        self.stream_read = False
-        LRFObject.__init__(self, stream, id, scramble_key, boundary)
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
        
    def read_stream_size(self, tag, stream):
        self.stream_size = tag.dword
@ -130,12 +189,26 @@ class LRFStream(LRFObject):

 class PageTree(LRFObject):
    tag_map = {
-        0xF55C: ['pageList', 'P'],
+        0xF55C: ['_contents', 'P'],
      }
    tag_map.update(LRFObject.tag_map)
    
+    def __iter__(self):
+        for id in self._contents:
+            yield self._document.objects[id]

-class PageAttr(LRFObject):
+class StyleObject(object):
+    
+    def __unicode__(self):
+        s = '<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id)
+        for h in self.tag_map.values():
+            attr = h[0]
+            if hasattr(self, attr):
+                s += '%s="%s" '%(attr, getattr(self, attr))
+        s += '/>\n'
+        return s
+
+class PageAttr(StyleObject, LRFObject):
    tag_map = {
        0xF507: ['oddheaderid', 'D'],
        0xF508: ['evenheaderid', 'D'],
@ -159,26 +232,180 @@ class PageAttr(LRFObject):
    tag_map.update(LRFObject.tag_map)


+class Color(object):
+    def __init__(self, val):
+        self.b, self.g, self.r, self.a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF
+        
+    def __unicode__(self):
+        return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b)
+    
+    def __str__(self):
+        return unicode(self)
+
+class EmptyPageElement(object):
+    def __iter__(self):
+        for i in range(0):
+            yield i
+        
+    def __str__(self):
+        return unicode(self)
+
+class PageDiv(EmptyPageElement):
+    
+    def __init__(self, pain, spacesize, linewidth, linecolor):
+        self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth
+        self.linecolor = Color(linecolor)
+        
+    def __unicode__(self):
+        return u'\n<PageDiv pain="%s" spacesize="%s" linewidth="%s" linecolor="%s" />\n'%\
+                (self.pain, self.spacesize, self.linewidth, self.color)
+        
+        
+class RuledLine(EmptyPageElement):
+    
+    linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'}
+    
+    def __init__(self, linelength, linetype, linewidth, linecolor):
+        self.linelength, self.linewidth = linelength, linewidth
+        self.linetype = self.linetype_map[linetype]
+        self.linecolor = Color(linecolor)
+        
+    def __unicode__(self):
+        return u'\n<RuledLine linelength="%s" linetype="%s" linewidth="%s" linecolor="%s" />\n'%\
+                (self.linelength, self.linetype, self.linewidth, self.linecolor)
+                
+class Wait(EmptyPageElement):
+    
+    def __init__(self, time):
+        self.time = time
+        
+    def __unicode__(self):
+        return u'\n<Wait time="%d" />\n'%(self.time)
+        
+class Locate(EmptyPageElement):
+    
+    pos_map = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'}
+    
+    def __init__(self, pos):
+        self.pos = self.pos_map[pos]
+        
+    def __unicode__(self):
+        return u'\n<Locate pos="%s" />\n'%(self.pos)
+        
+class BlockSpace(EmptyPageElement):
+    
+    def __init__(self, xspace, yspace):
+        self.xsace, self.yspace = xspace, yspace
+        
+    def __unicode__(self):
+        return u'\n<BlockSpace xspace="%d" yspace="%d" />\n'%\
+                (self.xspace, self.ysapce)
+
 class Page(LRFStream):
    tag_map = {
-        0xF503: ['pageStyle', 'D'],
-        0xF50B: ['contents', 'P'],
+        0xF503: ['style_id', 'D'],
+        0xF50B: ['obj_list', 'P'],
        0xF571: ['', ''],
-        0xF57C: ['parentPageTree','D'],
+        0xF57C: ['parent_page_tree','D'],
      }
    tag_map.update(PageAttr.tag_map)
    tag_map.update(LRFStream.tag_map)
    
+    class Content(LRFContentObject):
+        tag_map = {
+           0xF503: 'link',
+           0xF54E: 'page_div',
+           0xF547: 'x_space',
+           0xF546: 'y_space',
+           0xF548: 'pos',
+           0xF573: 'ruled_line',
+           0xF5D4: 'wait',
+           0xF5D6: 'sound_stop',
+          }
+        
+        def __init__(self, bytes, objects):
+            self.in_blockspace = False
+            LRFContentObject.__init__(self, bytes, objects)
+        
+        def link(self, tag):
+            self.close_blockspace()
+            self._contents.append(self.objects[tag.dword])
+            
+        def page_div(self, tag):
+            self.close_blockspace()
+            pars = struct.unpack("<HIHI", tag.contents)
+            self._contents.append(PageDiv(*pars))
+            
+        def x_space(self, tag):
+            self.xspace = tag.word
+            self.in_blockspace = True
+            
+        def y_space(self, tag):
+            self.yspace = tag.word
+            self.in_blockspace = True
+            
+        def pos(self, tag):
+            self.pos = tag.wordself.pos_map[tag.word]
+            self.in_blockspace = True
+            
+        def ruled_line(self, tag):
+            self.close_blockspace()
+            pars = struct.unpack("<HHHI", tag.contents)
+            self._contents.append(RuledLine(*pars))
+            
+        def wait(self, tag):
+            self.close_blockspace()
+            self._contents.append(Wait(tag.word))
+            
+        def sound_stop(self, tag):
+            self.close_blockspace()
+            
+        def close_blockspace(self):
+            if self.in_blockspace:
+                if hasattr(self, 'pos'):
+                    self._contents.append(Locate(self.pos))
+                    delattr(self, 'pos')
+                else:
+                    xspace = self.xspace if hasattr(self, 'xspace') else 0
+                    yspace = self.yspace if hasattr(self, 'yspace') else 0
+                    self._contents.append(BlockSpace(xspace, yspace))
+                    if hasattr(self, 'xspace'): delattr(self, 'xspace')
+                    if hasattr(self, 'yspace'): delattr(self, 'yspace')
+    
+    @apply
+    def style():
+        def fget(self):
+            return self._document.objects[self.style_id]
+        return property(fget=fget)
+    
+    def initialize(self):
+        self.content = Page.Content(self.stream, self._document.objects)
+    
+    def __iter__(self):
+        for i in self.content:
+            yield i
+        
+    def __unicode__(self):
+        s = u'\n<Page pagestyle="%d" objid="%d">\n'%(self.style_id, self.id)
+        for i in self:
+            s += unicode(i)
+        s += '\n</Page>\n'
+        return s
+    
+    def __str__(self):
+        return unicode(self)
+    
+    

-class BlockAttr(LRFObject):
+class BlockAttr(StyleObject, LRFObject):
    tag_map = {
        0xF531: ['blockwidth', 'W'],
        0xF532: ['blockheight', 'W'],
        0xF533: ['blockrule', 'W', {0x14: "horz-fixed", 0x12: "horz-adjustable", 0x41: "vert-fixed", 0x21: "vert-adjustable", 0x44: "block-fixed", 0x22: "block-adjustable"}],
-        0xF534: ['bgcolor', 'D'],
+        0xF534: ['bgcolor', 'D', Color],
        0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}],
        0xF536: ['framewidth', 'W'],
-        0xF537: ['framecolor', 'D'],
+        0xF537: ['framecolor', 'D', Color],
        0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}],
        0xF538: ['topskip', 'W'],
        0xF539: ['sidemargin', 'W'],
@ -187,25 +414,7 @@ class BlockAttr(LRFObject):
      }
    tag_map.update(LRFObject.tag_map)

-class Block(LRFStream):
-    tag_map = {
-        0xF503: ['atrId', 'D'],
-      }
-    tag_map.update(BlockAttr.tag_map)
-    tag_map.update(LRFStream.tag_map)
-
-class Header(LRFStream):
-    tag_map = {}
-    tag_map.update(LRFStream.tag_map)
-    tag_map.update(BlockAttr.tag_map)
-
-class Footer(Header):
-    pass
-
-class MiniPage(LRFObject):
-    pass
-
-class TextAttr(LRFObject):
+class TextAttr(StyleObject, LRFObject):
    tag_map = {
        0xF511: ['fontsize', 'w'],
        0xF512: ['fontwidth', 'w'],
@ -213,8 +422,8 @@ class TextAttr(LRFObject):
        0xF514: ['fontorientation', 'w'],
        0xF515: ['fontweight', 'W'],
        0xF516: ['fontfacename', 'P'],
-        0xF517: ['textcolor', 'D'],
-        0xF518: ['textbgcolor', 'D'],
+        0xF517: ['textcolor', 'D', Color],
+        0xF518: ['textbgcolor', 'D', Color],
        0xF519: ['wordspace', 'w'],
        0xF51A: ['letterspace', 'w'],
        0xF51B: ['baselineskip', 'w'],
@ -226,45 +435,384 @@ class TextAttr(LRFObject):
        0xF53E: ['columnsep', 'W'],
        0xF5DD: ['charspace', 'w'],
        0xF5F1: ['textlinewidth', 'W'],
-        0xF5F2: ['linecolor', 'D'],
+        0xF5F2: ['linecolor', 'D', Color],
      }
    tag_map.update(LRFObject.tag_map)

+
+class Block(LRFStream):
+    tag_map = {
+        0xF503: ['style_id', 'D'],
+      }
+    tag_map.update(BlockAttr.tag_map)
+    tag_map.update(TextAttr.tag_map)
+    tag_map.update(LRFStream.tag_map)
+    extra_attrs = [i[0] for i in BlockAttr.tag_map.values()]
+    extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()])
+    
+    @apply
+    def style():
+        def fget(self):
+            return self._document.objects[self.style_id]
+        return property(fget=fget)
+    
+    @apply
+    def textstyle():
+        def fget(self):
+            return self._document.objects[self.textstyle_id]
+        return property(fget=fget)
+    
+    def initialize(self):
+        self.attrs = {}
+        stream = cStringIO.StringIO(self.stream)
+        tag = Tag(stream)
+        if tag.id != 0xF503:
+            raise LRFParseError("Bad block content")
+        obj = self._document.objects[tag.dword]
+        if isinstance(obj, SimpleText):
+            self.name = 'SimpleTextBlock'
+            self.textstyle_id = obj.style_id
+        elif isinstance(obj, Text):
+            self.name = 'TextBlock'
+            self.textstyle_id = obj.style_id 
+        elif isinstance(obj, Image):
+            self.name = 'ImageBlock'
+            for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'):
+                self.attrs[attr] = getattr(obj, attr)
+        elif isinstance(obj, Button):
+            self.name = 'ButtonBlock'
+        else:
+            raise LRFParseError("Unexpected block type: "+obj.__class__.__name__)
+            
+        self.content = obj
+        
+        
+        for attr in self.extra_attrs:
+            if hasattr(self, attr):
+                self.attrs[attr] = getattr(self, attr)
+        
+    def __iter__(self):
+        try:
+            for i in iter(self.content):
+                yield i
+        except TypeError:
+            yield self.content
+        
+    def __unicode__(self):
+        s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id)
+        if hasattr(self, 'textstyle_id'):
+            s += 'textstyle="%d" '%(self.textstyle_id,)
+        for attr in self.attrs:
+            s += '%s="%s" '%(attr, self.attrs[attr])
+        s = s.rstrip()+'>\n'
+        if self.name != 'ImageBlock':
+            for i in self:
+                s += unicode(i)
+        s += '</%s>\n'%(self.name,)
+        return s
+        
+
+class MiniPage(LRFStream):
+    tag_map = {
+        0xF541: ['minipagewidth', 'W'],
+        0xF542: ['minipageheight', 'W'],
+      }
+    tag_map.update(LRFStream.tag_map)
+    tag_map.update(BlockAttr.tag_map)
+
 class Text(LRFStream):
    tag_map = {
-        0xF503: ['atrId', 'D'],
+        0xF503: ['style_id', 'D'],
      }
    tag_map.update(TextAttr.tag_map)
    tag_map.update(LRFStream.tag_map)
+    
+    @apply
+    def style():
+        def fget(self):
+            return self._document.objects[self.style_id]
+        return property(fget=fget)
+    
+    class Content(LRFContentObject):
+        tag_map = {
+           0xF581: ['simple_container', 'Italic'],
+           0xF582: 'end_container',
+           0xF5B1: ['simple_container', 'Yoko'],
+           0xF5B2: 'end_container',
+           0xF5B3: ['simple_container', 'Tate'],
+           0xF5B4: 'end_container',
+           0xF5B5: ['simple_container', 'Nekase'],
+           0xF5B6: 'end_container',
+           0xF5A1: 'start_para',
+           0xF5A2: 'end_para',
+           0xF5A7: 'char_button',
+           0xF5A8: 'end_container',
+           0xF5A9: ['simple_container', 'Rubi'],
+           0xF5AA: 'end_container',
+           0xF5AB: ['simple_container', 'Oyamoji'],
+           0xF5AC: 'end_container',
+           0xF5AD: ['simple_container', 'Rubimoji'],
+           0xF5AE: 'end_container',
+           0xF5B7: ['simple_container', 'Sup'],
+           0xF5B8: 'end_container',
+           0xF5B9: ['simple_container', 'Sub'],
+           0xF5BA: 'end_container',
+           0xF5BB: ['simple_container', 'NoBR'],
+           0xF5BC: 'end_container',
+           0xF5BD: ['simple_container', 'EmpDots'],
+           0xF5BE: 'end_container',
+           0xF5C1: ['simple_container', 'EmpLine'],
+           0xF5C2: 'end_container',
+           0xF5C3: 'draw_char',
+           0xF5C4: 'end_container',
+           0xF5C6: 'box',
+           0xF5C7: 'end_container',
+           0xF5CA: 'space',
+           0xF5CC: 'string',
+           0xF5D1: 'plot',
+           0xF5D2: 'cr',
+        }
+        
+        text_map = { 0x22: u'&quot;', 0x26: u'&amp;', 0x27: u'&apos;', 0x3c: u'&lt;', 0x3e: u'&gt;' }
+        linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'}
+        adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'}
+        
+        def __init__(self, bytes, objects, parent=None, name=None, attrs={}):
+            self.parent = parent
+            self.name = name
+            self.attrs = attrs
+            LRFContentObject.__init__(self, bytes, objects)
+            
+        def parse_stream(self, length):
+            offset = self.stream.tell()
+            while self.in_container and offset < length:
+                buf = self.stream.getvalue()[offset:]
+                pos = buf.find('\xf5') - 1
+                if pos > 0:
+                    self.stream.seek(offset+pos)
+                    self.add_text(buf[:pos])
+                self.handle_tag(Tag(self.stream))
+                offset = self.stream.tell()
+            
+        def handle_tag(self, tag):
+            if tag.id in self.tag_map:
+                action = self.tag_map[tag.id]
+                if isinstance(action, basestring):
+                    func, args = action, tuple([])  
+                else:
+                    func, args = action[0], (action[1],)
+                getattr(self, func)(tag, *args)
+            elif tag.id in TextAttr.tag_map:
+                h = TextAttr.tag_map[tag.id]
+                val = LRFObject.tag_to_val(h, None, tag, self.stream)
+                if self.name == 'Span':
+                    if h[0] not in self.attrs:
+                        self.attrs[h[0]] = val
+                    elif val != self.attrs[h[0]]:
+                        if self._contents: self.parent._contents.append(self)
+                        Text.Content(self.stream, self.objects, self.parent, 
+                                            'Span', {h[0]: val})
+                        
+                        
+                else:
+                    Text.Content(self.stream, self.objects, self, 
+                                        'Span', {h[0]: val})
+                    
+            else:
+                raise LRFParseError('Unknown tag in text stream %s'&(tag,))
+                
+            
+        def simple_container(self, tag, name):
+            cont = Text.Content(self.stream, self.objects, parent=self, name=name)            
+            self._contents.append(cont)
+            
+        def end_container(self, *args):
+            self.in_container = False
+            if self.name == 'Span' and self._contents:
+                self.parent._contents.append(self)
+            
+        def end_to_root(self):
+            parent = self
+            while parent:
+                parent.end_container()
+                parent = parent.parent
+                
+        def root(self):
+            root = self
+            while root.parent:
+                root = root.parent
+            return root
+                
+        def start_para(self, tag):
+            self.end_to_root()
+            root = self.root()
+            root.in_container = True
+            
+            p = Text.Content(self.stream, self.objects, parent=root, name='P')
+            root._contents.append(p)
+            
+        def end_para(self, tag):
+            self.end_to_root()
+            root = self.root()
+            root.in_container = True
+            
+        def cr(self, tag):
+            self._contents.append(Text.Content('', self.objects, parent=self, name='CR'))
+            
+        def char_button(self, tag):
+            self._contents.append(Text.Content(self.stream, self.objects, parent=self, 
+                                    name='CharButton', attrs={'refobj':tag.dword}))
+        
+        def space(self, tag):
+            self._contents.append(Text.Content('', self.objects, parent=self, 
+                                name='Space', attrs={'xsize':tag.sword}))
+            
+        def string(self, tag):
+            strlen = tag.word
+            self.add_text(self.stream.read(strlen))
+            
+        def add_text(self, text):
+            s = unicode(text, "utf-16-le")
+            self._contents.append(s.translate(self.text_map))
+            
+        def plot(self, tag):
+            xsize, ysize, refobj, adjustment = struct.unpack("<HHII", tag.contents)
+            self._contents.append(Text.Content('', self.objects, self, 'Plot',
+                {'xsize': xsize, 'ysize': ysize, 'refobj':refobj, 
+                 'adjustment':self.adjustment_map[adjustment]}))
+                        
+        def draw_char(self, tag):
+            self._contents.append(Text.Content(self.stream, self.objects, self, 
+                                        'DrawChar', {'line':tag.word}))
+            
+        def box(self, tag):
+            self._contents.append(Text.Content(self.stream, self.objects, self, 
+                                        'Box', {'linetype':self.linetype_map[tag.word]}))
+            
+        def __iter__(self):
+            for i in self._contents:
+                yield i
+            
+        def __unicode__(self):
+            if self.name == 'CR': return u'<CR/>'
+            s = u''
+            if self.name is not None:
+                s += u'<'+self.name+u' '
+                for attr in self.attrs:
+                    s += u'%s="%s" '%(attr, self.attrs[attr])
+                s = s.rstrip() + u'>'
+            for i in self:
+                s += unicode(i)
+            if self.name is not None:
+                s += u'</%s>'%(self.name,)
+                if self.name in ['P', "CR"]:
+                    s += '\n'
+            return s
+        
+        def __str__(self):
+            return unicode(self)
+    
+    def initialize(self):
+        self.content = Text.Content(self.stream, self._document.objects)
+                
+    def __iter__(self):
+        for i in self.content:
+            yield i
+        
+    def __str__(self):
+        return unicode(self.content)


 class Image(LRFObject):
    tag_map = {
        0xF54A: ['', 'parse_image_rect'],
        0xF54B: ['', 'parse_image_size'],
-        0xF54C: ['ref_object_id', 'D'],      #imagestream or import
+        0xF54C: ['refstream', 'D'],      
        0xF555: ['comment', 'P'],
      }
    
    def parse_image_rect(self, tag, f):
-        self.image_rect = struct.unpack("<HHHH", tag.contents)
+        self.x0, self.y0, self.x1, self.y1 = struct.unpack("<HHHH", tag.contents)
        
    def parse_image_size(self, tag, f):
-        self.image_size = struct.unpack("<HH", tag.contents)
+        self.xsize, self.ysize = struct.unpack("<HH", tag.contents)
+        
+    @apply
+    def encoding():
+        def fget(self):
+            return self._document.objects[self.refstream].encoding
+        return property(fget=fget)
+    
+    @apply
+    def data():
+        def fget(self):
+            return self._document.objects[self.refstream].stream
+        return property(fget=fget)
+    
+    def __unicode__(self):
+        return u'<Image objid="%s" x0="%d" y0="%d" x1="%d" y1="%d" xsize="%d" ysize="%d" refstream="%d" />\n'%\
+        (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream)
+
+class PutObj(EmptyPageElement):
+    
+    def __init__(self, x, y, refobj):
+        self.x, self.y, self.refobj = x, y, refobj
+        
+    def __unicode__(self):
+        return u'<PutObj x="%d" y="%d" refobj="%d" />'%(self.x, self.y, self.refobj)

 class Canvas(LRFStream):
    tag_map = {
        0xF551: ['canvaswidth', 'W'],
        0xF552: ['canvasheight', 'W'],
        0xF5DA: ['', 'parse_waits'],
+        0xF533: ['blockrule', 'W', {0x44: "block-fixed", 0x22: "block-adjustable"}],
+        0xF534: ['bgcolor', 'D', Color],
+        0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}],
+        0xF536: ['framewidth', 'W'],
+        0xF537: ['framecolor', 'D', Color],
+        0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}],
      }
-    tag_map.update(BlockAttr.tag_map)
    tag_map.update(LRFStream.tag_map)
+    extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', 
+                   'framewidth', 'framecolor', 'framemode']
    
    def parse_waits(self, tag, f):
        val = tag.word
        self.setwaitprop = val&0xF
        self.setwaitsync = val&0xF0
+        
+    def initialize(self):
+        self.attrs = {}
+        for attr in self.extra_attrs:
+            if hasattr(self, attr):
+                self.attrs[attr] = getattr(self, attr)
+        self._contents = []
+        stream = cStringIO.StringIO(self.stream)
+        while stream.tell() < len(self.stream):
+            tag = Tag(stream)
+            self._contents.append(PutObj(*struct.unpack("<HHI", tag.contents)))
+            
+    def __unicode__(self):
+        s = '\n<%s objid="%s" '%(self.__class__.__name__, self.id,)
+        for attr in self.attrs:
+            s += '%s="%s" '%(attr, self.attrs[attr])
+        s = s.rstrip() + '>\n'
+        for po in self:
+            s += unicode(po) + '\n'
+        s += '</%s>\n'%(self.__class__.__name__,)
+        return s
+    
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+        
+class Header(Canvas):
+    pass
+
+class Footer(Canvas):
+    pass
+

 class ESound(LRFObject):
    pass
@ -273,9 +821,26 @@ class ImageStream(LRFStream):
    tag_map = {
        0xF555: ['comment', 'P'],
      }
+    imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'}
+    
    tag_map.update(LRFStream.tag_map)
+    
+    @apply
+    def encoding():
+        def fget(self):
+            return self.imgext[self.stream_flags & 0xFF].upper()
+        return property(fget=fget)
+    
+    def end_stream(self, *args):
+        LRFStream.end_stream(self, *args)
+        self.file = str(self.id) + '.' + self.encoding.lower()
+        self._document.image_map[self.id] = self
+        
+    def __unicode__(self):
+        return u'<ImageStream objid="%s" encoding="%s" file="%s" />\n'%\
+            (self.id, self.encoding, self.file)

-class Import(LRFObject):
+class Import(LRFStream):
    pass

 class Button(LRFObject):
@ -300,12 +865,12 @@ class Button(LRFObject):
      }
    tag_map.update(LRFObject.tag_map)
    
-    def __init__(self, stream, id, scramble_key, boundary):
+    def __init__(self, document, stream, id, scramble_key, boundary):
        self.xml = u''
        self.refimage = {}
        self.actions = {}
        self.to_dump = True
-        LRFObject.__init__(self, stream, id, scramble_key, boundary)
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
    
    def do_ref_image(self, tag, f):
        self.refimage[self.button_yype] = tag.dword
@ -341,6 +906,38 @@ class Button(LRFObject):
    
    def parse_run(self, tag, f):
        self.actions[self.button_type].append((5, struct.unpack("<HI", tag.contents)))
+        
+    def jump_action(self, button_type):
+        for i in self.actions[button_type]:
+            if i[0] == 1:
+                return i[1:][0]
+    
+    def __unicode__(self):
+        s = u'<Button objid="%s">\n'%(self.id,)
+        if self.button_flags & 0x10 != 0:
+            s += '<PushButton '
+            if 2 in self.refimage:
+                s += 'refimage="%s" '%(self.refimage[2],)
+            s = s.rstrip() + '>\n'
+            s += '<JumpTo refpage="%s" refobj="%s" />\n'% self.jump_action(2)
+            s += '</PushButton>\n'
+        else:
+            raise LRFParseError('Unsupported button type')
+        s += '</Button>\n'
+        return s
+    
+    @apply
+    def refpage():
+        def fget(self):
+            return self.jump_action(2)[0]
+        return property(fget=fget)
+    
+    @apply
+    def refobject():
+        def fget(self):
+            return self.jump_action(2)[1]
+        return property(fget=fget)
+

 class Window(LRFObject):
    pass
@ -356,35 +953,87 @@ class SoundStream(LRFObject):

 class Font(LRFStream):
    tag_map = {
-        0xF559: ['fontFilename', 'P'],
-        0xF55D: ['fontFacename', 'P'],
+        0xF559: ['fontfilename', 'P'],
+        0xF55D: ['fontfacename', 'P'],
      }
    tag_map.update(LRFStream.tag_map)
+    
+    def end_stream(self, *args):
+        LRFStream.end_stream(self, *args)
+        self._document.font_map[self.fontfacename] = self
+        self.file = self.fontfacename + '.ttf'
+    
+    def __unicode__(self):
+        s = '<RegistFont objid="%s" fontfilename="%s" fontfacename="%s" encoding="TTF" file="%s" />\n'%\
+            (self.id, self.fontfilename, self.fontfacename, self.file)
+        return s

 class ObjectInfo(LRFObject):
    pass


-class BookAttr(LRFObject):
+class BookAttr(StyleObject, LRFObject):
    tag_map = {
-        0xF57B: ['pageTreeId', 'D'],
+        0xF57B: ['page_tree_id', 'D'],
        0xF5D8: ['', 'add_font'],
        0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}],
      }
    tag_map.update(LRFObject.tag_map)
+    binding_map = {1: 'Lr', 16 : 'Rl'}
    
-    def __init__(self, stream, id, scramble_key, boundary):
+    def __init__(self, document, stream, id, scramble_key, boundary):
        self.font_link_list = []
-        LRFObject.__init__(self, stream, id, scramble_key, boundary)
+        LRFObject.__init__(self, document, stream, id, scramble_key, boundary)
        
    def add_font(self, tag, f):
        self.font_link_list.append(tag.dword)
+        
+    def __unicode__(self):
+        s = u'<BookStyle objid="%s" stylelabel="%s">\n'%(self.id, self.id)
+        doc = self._document
+        s += u'<BookSetting bindingdirection="%s" dpi="%s" screenwidth="%s" screenheight="%s" colordepth="%s" />\n'%\
+        (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) 
+        for font in self._document.font_map.values():
+            s += unicode(font)
+        s += '</BookStyle>\n'
+        return s

-class SimpleText(LRFObject):
+class SimpleText(Text):
    pass

+class TocLabel(object):
+    
+    def __init__(self, refpage, refobject, label):
+        self.refpage, self.refobject, self.label = refpage, refobject, label
+        
+    def __unicode__(self):
+        return u'<TocLabel refpage="%s" refobj="%s">%s</TocLabel>\n'%(self.refpage, self.refobject, self.label)
+
 class TOCObject(LRFStream):
-    pass
+    
+    def initialize(self):
+        stream = cStringIO.StringIO(self.stream)
+        c = struct.unpack("<H", stream.read(2))[0]
+        stream.seek(4*(c+1))
+        self._contents = []
+        while c > 0:
+            refpage = struct.unpack("<I", stream.read(4))[0]
+            refobj  = struct.unpack("<I", stream.read(4))[0]
+            cnt = struct.unpack("<H", stream.read(2))[0]
+            label = unicode(stream.read(cnt), "utf_16")
+            self._contents.append(TocLabel(refpage, refobj, label))
+            c -= 1
+            
+    def __iter__(self):
+        for i in self._contents:
+            yield i
+        
+    def __unicode__(self):
+        s = u'<TOC>\n'
+        for i in self:
+            s += unicode(i)
+        return s + '</TOC>\n'
+            

 object_map = [
    None,       #00
@ -421,14 +1070,14 @@ object_map = [
 ]


-def get_object(stream, id, offset, size, scramble_key):
+def get_object(document, stream, id, offset, size, scramble_key):
    stream.seek(offset)
    start_tag = Tag(stream)
    if start_tag.id != 0xF500:
        raise LRFParseError('Bad object start')
    obj_id, obj_type = struct.unpack("<IH", start_tag.contents)
    if obj_type < len(object_map) and object_map[obj_type] is not None:
-        return object_map[obj_type](stream, obj_id, scramble_key, offset+size-8)
+        return object_map[obj_type](document, stream, obj_id, scramble_key, offset+size-Tag.tags[0][0])
    
    raise LRFParseError("Unknown object type: %02X!" % obj_type)

--- a/src/libprs500/ebooks/lrf/parser.py
+++ b/src/libprs500/ebooks/lrf/parser.py
@ -14,20 +14,26 @@
 ##    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 ''''''

-import sys, struct, array, os
+import sys, array, os, re, codecs, logging

-from libprs500.ebooks.lrf.meta import LRFMetaFile, LRFException
-from libprs500.ebooks.lrf.objects import get_object
+from libprs500 import __author__, __appname__, __version__, setup_cli_handlers 
+from libprs500.ebooks.lrf.meta import LRFMetaFile
+from libprs500.ebooks.lrf.objects import get_object, PageTree, StyleObject, \
+                                         Font, Text, TOCObject
+                                         

 class LRFDocument(LRFMetaFile):
    
-    def __init__(self, stream, temp_dir):
+    def __init__(self, stream):
        LRFMetaFile.__init__(self, stream)
-        self.temp_dir = os.path.abspath(os.path.expanduser(temp_dir))
        self.scramble_key = self.xor_key
-        self.parse_objects()
+        self.page_trees = []
+        self.font_map = {}
+        self.image_map = {}
+        self._parse_objects()
+        self.toc = None
        
-    def parse_objects(self):
+    def _parse_objects(self):
        self.objects = {}
        self._file.seek(self.object_index_offset)
        obj_array = array.array("I", self._file.read(4*4*self.number_of_objects))
@ -35,25 +41,101 @@ class LRFDocument(LRFMetaFile):
            obj_array.byteswap()
        for i in range(self.number_of_objects):
            objid, objoff, objsize = obj_array[i*4:i*4+3]
-            self.parse_object(objid, objoff, objsize)
-            
-    def parse_object(self, objid, objoff, objsize):
-        self.objects[objid] = get_object(self._file, objid, objoff, objsize, self.scramble_key)
+            self._parse_object(objid, objoff, objsize)
+        for obj in self.objects.values():
+            if hasattr(obj, 'initialize'):
+                obj.initialize()
+                    
+    def _parse_object(self, objid, objoff, objsize):
+        obj = get_object(self, self._file, objid, objoff, objsize, self.scramble_key)
+        self.objects[objid] = obj
+        if isinstance(obj, PageTree):
+            self.page_trees.append(obj)
+        elif isinstance(obj, TOCObject):
+            self.toc = obj
+    
+    def __iter__(self):
+        for pt in self.page_trees:
+            yield pt
+        
+    def write_files(self):
+        for obj in self.image_map.values() + self.font_map.values():
+            open(obj.file, 'wb').write(obj.stream)            
+        
+    def to_xml(self):
+        bookinfo = u'<BookInformation>\n<Info version="1.1">\n<BookInfo>\n'
+        bookinfo += u'<Title reading="%s">%s</Title>\n'%(self.title_reading, self.title)
+        bookinfo += u'<Author reading="%s">%s</Author>\n'%(self.author_reading, self.author)
+        bookinfo += u'<BookID>%s</BookID>\n'%(self.book_id,)
+        bookinfo += u'<Publisher reading="">%s</Publisher>\n'%(self.publisher,)
+        bookinfo += u'<Label reading="">%s</Label>\n'%(self.label,)
+        bookinfo += u'<Category reading="">%s</Category>\n'%(self.category,)
+        bookinfo += u'<Classification reading="">%s</Classification>\n'%(self.classification,)
+        bookinfo += u'<FreeText reading="">%s</FreeText>\n</BookInfo>\n<DocInfo>\n'%(self.free_text,)
+        th = self.thumbnail
+        if th:
+            bookinfo += u'<CThumbnail file="%s" />\n'%(self.title+'_thumbnail.'+self.thumbail_extension(),)
+            open(self.title+'_thumbnail.'+self.thumbail_extension(), 'wb').write(th)
+        bookinfo += u'<Language reading="">%s</Language>\n'%(self.language,)
+        bookinfo += u'<Creator reading="">%s</Creator>\n'%(self.creator,)
+        bookinfo += u'<Producer reading="">%s</Producer>\n'%(self.producer,)
+        bookinfo += u'<SumPage>%s</SumPage>\n</DocInfo>\n</Info>\n</BookInformation>\n'%(self.page,)
+        pages = u''
+        done_main = False
+        pt_id = -1
+        for page_tree in self:
+            if not done_main:
+                done_main = True
+                pages += u'<Main>\n'
+                close = u'</Main>\n'
+                pt_id = page_tree.id
+            else:
+                pages += u'<PageTree objid="%d">\n'%(page_tree.id,)
+                close = u'</PageTree>\n'
+            for page in page_tree:
+                pages += unicode(page)
+            pages += close
+        traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id]
+        
+        objects = u'\n<Objects>\n'
+        styles  = u'\n<Style>\n'
+        for obj in self.objects:
+            obj = self.objects[obj]
+            if obj.id in traversed_objects or isinstance(obj, (Font, Text)):
+                continue            
+            if isinstance(obj, StyleObject):
+                styles += unicode(obj)
+            else:
+                objects += unicode(obj)
+        styles += '</Style>\n'
+        objects += '</Objects>\n'
+        self.write_files()
+        return '<BBeBXylog version="1.0">\n' + bookinfo + pages + styles + objects + '</BBeBXylog>'
        
-    def get_byte(self):
-        return struct.unpack("<B", self.stream.read(1))[0]
    
-    def get_word(self):
-        return struct.unpack("<H", self.stream.read(2))[0]
-    
-    def get_dword(self):
-        return struct.unpack("<I", self.stream.read(4))[0]
-    
-    def get_qword(self):
-        return struct.unpack("<Q", self.stream.read(8))[0]
-
-def main(args=sys.argv):
-    LRFDocument(open(args[1], 'rb'), '.')
+def main(args=sys.argv, logger=None):
+    from optparse import OptionParser
+    parser = OptionParser(usage='%prog book.lrf', epilog='Created by '+__author__,
+                          version=__appname__ + ' ' + __version__)
+    parser.add_option('--output', '-o', default=None, help='Output LRS file', dest='out')
+    parser.add_option('--verbose', default=False, action='store_true', dest='verbose')
+    opts, args = parser.parse_args(args)
+    if logger is None:
+        level = logging.DEBUG if opts.verbose else logging.INFO
+        logger = logging.getLogger('lrf2lrs')
+        setup_cli_handlers(logger, level)
+    if len(args) != 2:
+        parser.print_help()
+        return 1
+    if opts.out is None:
+        opts.out = os.path.join(os.path.dirname(args[1]), os.path.splitext(os.path.basename(args[1]))[0]+".lrs")
+    o = codecs.open(os.path.abspath(os.path.expanduser(opts.out)), 'wb', 'utf-8')
+    o.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
+    logger.info('Parsing LRF...')
+    d = LRFDocument(open(args[1], 'rb'))
+    logger.info('Creating XML...')
+    o.write(d.to_xml())
+    logger.info('LRS written to '+opts.out)
    return 0

 if __name__ == '__main__':
--- a/src/libprs500/ebooks/lrf/tags.py
+++ b/src/libprs500/ebooks/lrf/tags.py
@ -199,7 +199,7 @@ class Tag(object):
        self.offset = stream.tell()
        tag_id = struct.unpack("<BB", stream.read(2))
        if tag_id[1] != 0xF5: 
-            raise LRFParseError("Bad tag ID")
+            raise LRFParseError("Bad tag ID %02X at %d"%(tag_id[1], self.offset))
        if tag_id[0] not in self.__class__.tags: 
            raise LRFParseError("Unknown tag ID: F5%02X" % tag_id[0])