From 51458628ab3109b7d676f04f4a2b6dbb87acdb78 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Mon, 17 Sep 2007 03:13:44 +0000 Subject: [PATCH] Implement order of magnitude faster lrf2lrs --- setup.py | 2 +- src/libprs500/ebooks/lrf/__init__.py | 3 + src/libprs500/ebooks/lrf/lrs/convert_to.py | 1608 -------------------- src/libprs500/ebooks/lrf/lrs/tags.py | 257 ---- src/libprs500/ebooks/lrf/meta.py | 6 +- src/libprs500/ebooks/lrf/objects.py | 793 +++++++++- src/libprs500/ebooks/lrf/parser.py | 130 +- src/libprs500/ebooks/lrf/tags.py | 2 +- 8 files changed, 835 insertions(+), 1966 deletions(-) delete mode 100644 src/libprs500/ebooks/lrf/lrs/convert_to.py delete mode 100644 src/libprs500/ebooks/lrf/lrs/tags.py diff --git a/setup.py b/setup.py index be97230552..5302fa9bc3 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ entry_points = { 'web2lrf = libprs500.ebooks.lrf.web.convert_from:main',\ 'pdf2lrf = libprs500.ebooks.lrf.pdf.convert_from:main',\ 'any2lrf = libprs500.ebooks.lrf.any.convert_from:main',\ - 'lrf2lrs = libprs500.ebooks.lrf.lrs.convert_to:main',\ + 'lrf2lrs = libprs500.ebooks.lrf.parser:main',\ 'libprs500-beta = libprs500.gui2.main:main',\ ], 'gui_scripts' : [ APPNAME+' = libprs500.gui.main:main'] diff --git a/src/libprs500/ebooks/lrf/__init__.py b/src/libprs500/ebooks/lrf/__init__.py index a111dd2d45..e5ef39573a 100644 --- a/src/libprs500/ebooks/lrf/__init__.py +++ b/src/libprs500/ebooks/lrf/__init__.py @@ -31,6 +31,9 @@ from libprs500 import iswindows __docformat__ = "epytext" +class LRFParseError(Exception): + pass + class PRS500_PROFILE(object): screen_width = 600 diff --git a/src/libprs500/ebooks/lrf/lrs/convert_to.py b/src/libprs500/ebooks/lrf/lrs/convert_to.py deleted file mode 100644 index f5ebeb4c93..0000000000 --- a/src/libprs500/ebooks/lrf/lrs/convert_to.py +++ /dev/null @@ -1,1608 +0,0 @@ -#!/usr/bin/env python - -""" -lrf2lrs v 0.4 2007-01-09 - -Copyright (c) 2006-2007 roxfan, Igor Skochinsky - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the "Software"), -to deal in the Software without restriction, including without limitation -the rights to use, copy, modify, merge, publish, distribute, sublicense, -and/or sell copies of the Software, and to permit persons to whom the -Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -""" - -import zlib, sys, struct, array, codecs, string, StringIO, re -from types import FunctionType - -import libprs500.ebooks.lrf.lrs.tags as tags -from libprs500 import __version__, __appname__ - -class LRFException(Exception): - def __init__(self,msg): - self.msg=msg - def __str__(self): - return repr(self.msg) - -def getByte(f): - return struct.unpack("0x8000: val = val-0x10000 - elif h[1]=='B': - val = tag.paramByte() - elif h[1]=='P': - val = tag.params - elif h[1]!='': - val = getattr(obj, h[1])(tag, f) #call obj.method(tag, f) - - if h[1]!='' and h[0]!='': - setattr(obj, h[0], val) - #print str(obj)+"."+h[0]+"="+str(getattr(obj, h[0])) - return True - elif 0 in tagmap: - return recurseTagMap(obj, tagmap[0], tag, f) - elif 1 in tagmap: - for i in tagmap[1]: - res = recurseTagMap(obj, i, tag, f) - if res: return res - return - return None - -def doTagMap(obj, tagmap, tag, f): - res = recurseTagMap(obj, tagmap, tag, f) - if res: - return res - else: - raise LRFException("Unknown tag in %s: %s" % (obj.__class__.__name__,str(tag))) - -def colorToString(val): - return '0x%02X%02X%02X%02X' % (val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF) - -def recurseTagMapXml(obj, tagmap, objects): - s = '' - for tag in tagmap: - if tag>2: - h = tagmap[tag] - if len(h)<3: - raise LRFException("Don't know how to convert tag %04X!" % (tag)) - valname = h[0] - if valname=='': - mapping = h[2] - if mapping!=None: - if (type(mapping) is FunctionType): - s += mapping(obj, objects) - else: - raise LRFException("Unknown mapping type for tag %04X: %s!" % (tag, type(mapping))) - elif hasattr(obj, valname): - val = getattr(obj, valname) - mapping = h[2] - if mapping!=None: - if type(mapping) is str: - if mapping == 'C': - if val&0xFF!=0xFF: s += ' %s="%s"' % (valname, colorToString(val)) - elif mapping == 'L': - s += ' %s="%d"' % (valname, val*5) - else: - s += ' '+valname+'="'+mapping % val+'"' - elif type(mapping) is FunctionType: - s += mapping(val) - elif type(mapping) is dict: - if val in mapping: - if mapping[val]!=None: - s += ' '+valname+'="'+mapping[val]+'"' - else: - raise LRFException("Unexpected value (%s) for tag '%s' (%04X)!" % (str(val), valname, tag)) - else: - raise LRFException("Unknown mapping type for tag %04X: %s!" % (tag, type(mapping))) - elif tag==0: - s+=recurseTagMapXml(obj, tagmap[0], objects) - elif tag==1: - for i in tagmap[1]: - s += recurseTagMapXml(obj, i, objects) - else: - raise LRFException("Bad tag value: %04X!" % tag) - return s - -def doTagMapXml(obj, tagmap, objects): - res = recurseTagMapXml(obj, tagmap, objects) - return res - -def recurseTagMapXml2(obj, tagmap, objects, tag): - s = u'' - tagId = tag.tagId - if tagId in tagmap: - h = tagmap[tagId] - if len(h)<3: - raise LRFException("Don't know how to convert tag %04X!" % (tagId)) - valname = h[0] - if valname=='': - mapping = h[2] - if mapping!=None: - if (type(mapping) is FunctionType): - s += mapping(obj, objects) - else: - raise LRFException("Unknown mapping type for tag %04X: %s!" % (tag, type(mapping))) - else: - if h[1]=='D': - val = tag.paramDWord() - elif h[1]=='W': - val = tag.paramWord() - elif h[1]=='w': - val = tag.paramWord() - if val>0x8000: val = val-0x10000 - elif h[1]=='B': - val = tag.paramByte() - elif h[1]=='P': - val = tag.params - elif h[1]!='': - val = getattr(obj, h[1])(tag, f) #call obj.method(tag, f) - else: - raise LRFException("Don't know how to get value for tag %04X!" % tagId) - mapping = h[2] - if type(mapping) is str: - if mapping == 'C': - if val&0xFF!=0xFF: s += ' %s="%s"' % (valname, colorToString(val)) - else: s+=' ' - else: - s += ' '+valname+'="'+mapping % val+'"' - elif type(mapping) is FunctionType: - s += mapping(val) - elif type(mapping) is dict: - if val in mapping: - s += ' '+valname+'="'+mapping[val]+'"' - else: - raise LRFException("Unexpected value (%s) for tag '%s' (%04X)!" % (str(val), valname, tagId)) - else: - raise LRFException("Unknown mapping type for tag %04X: %s!" % (tagId, type(mapping))) - elif 0 in tagmap: - s+=recurseTagMapXml2(obj, tagmap[0], objects, tag) - elif 1 in tagmap: - for i in tagmap[1]: - s += recurseTagMapXml2(obj, i, objects, tag) - return s - -def doTagMapXml2(obj, tagmap, objects, tag): - s = recurseTagMapXml2(obj, tagmap, objects, tag) - if len(s)==0: - raise LRFException("Bad tag value: %04X!" % tag.tagId) - return s - -def descrambleBuf(buf, l, xorKey): - i = 0 - a = array.array('B',buf) - while l>0: - a[i] ^= xorKey - i+=1 - l-=1 - return a.tostring() - -def nullfunc(obj, objects): return '' - -class LRFObject: - tagMap = { - 0xF500: ['', '', None], - 0xF502: ['infoLink', 'D', None], - 0xF501: ['','', None] - } - def __init__(self, objId): - self.objId = objId - self.toDump = True - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def addXmlTags(self, objects): - res = u' objid="%d"' % self.objId - if self.__class__.__name__ == "LRFImageStream": - label = "imagestreamlabel" - elif self.__class__.__name__ == "LRFPopUpWin": - label = "popupwinlabel" - elif self.__class__.__name__ == "LRFWindow": - label = "windowlabel" - elif self.__class__.__name__ == "LRFESound": - label = "esoundlabel" - elif self.__class__.__name__ == "LRFHeader": - label = "headerlabel" - elif self.__class__.__name__ == "LRFFooter": - label = "footerlabel" - elif self.__class__.__name__[-3:] == "Atr": - label = "" - else: - label = "objlabel" - if label!="": - res += ' %s="%s.%d"' % (label, self.__class__.__name__[3:], self.objId) - if hasattr(self,'toclabel'): - res += ' toclabel="%s"' % self.toclabel - return res - def __str__(self): - return self.__class__.__name__+": %04X" % self.objId - - -class LRFStream(LRFObject): - tagMap = { - 0xF504: ['', 'doStreamSize'], - 0xF554: ['streamFlags', 'W'], - 0xF505: ['','readStream'], - 0xF506: ['','endStream'], - 0: LRFObject.tagMap - } - def __init__(self, objId): - LRFObject.__init__(self, objId) - self.stream='' - self.streamSize=0 - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def endStream(self, tag, f): - gotEndStream = True - def doStreamSize(self, tag, f): - self.streamSize = tag.paramDWord() - if self.streamSize == 0: self.stream='' - def readStream(self, tag, f): - if hasattr(self,'gotEndStream') and self.gotEndStream: - raise LRFException("There can be only one stream per object!") - if not hasattr(self, 'streamSize'): - raise LRFException("Stream size was not defined!") - if not hasattr(self, 'streamFlags'): - raise LRFException("Stream flags were not defined!") - self.stream = f.read(self.streamSize) - if self.streamFlags & 0x200 !=0: - l = len(self.stream); - key = l % self.scrambleKey + 0xF; - if l>0x400 and (isinstance(self,LRFImageStream) or isinstance(self,LRFFont) or isinstance(self,LRFSoundStream)): - l = 0x400; - #print "Descrambling %X bytes with key %X" % (l, key) - self.stream = descrambleBuf(self.stream, l, key) - if self.streamFlags & 0x100 !=0: - decompSize = struct.unpack(" %X bytes" % (len(self.stream)-4, decompSize) - self.stream = zlib.decompress(self.stream[4:]) - if len(self.stream)!=decompSize: - raise LRFException("Stream decompressed size is wrong!") - off = f.tell() - next = f.read(2) - if next!='\x06\xF5': - print "Warning: corrupted end-of-stream tag at %08X; skipping it"%off - self.endStream(0,0) - -#01 -class LRFPageTree(LRFObject): - tagMap = { - 0xF55C: ['pageList', 'P'], - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects, main=False): - self.toDump = False - if main: - print "Writing main pages...", - res = u"
\n" - else: - res = u'\n' % self.objId - for i in self.pageList: - #print "Page id=%X"% i - res+=objects[i].toXml(objects) - if main: - res += u"
\n" - print "done." - else: - res += u"\n" - return res; - -def bgImageToString(self, objects): - modemap = {0: 'fix', 1: 'fix', 2: 'tile', 3: 'centering'} - s = '' - if hasattr(self,'bgImageMode'): - s += ' bgimagemode="%s"' % modemap[getattr(self,'bgImageMode')] - if hasattr(self,'bgImageId'): - refid = getattr(self,'bgImageId') - if refid>0: s += ' refbgimage="%d"' % refid - return s - -def parseBgImage(self, tag, f): - self.bgImageMode, self.bgImageId = struct.unpack("\n" - return res; - -class LRFPageContent: - tagMap = { - 0xF503: ['', 'doContained'], - 0xF54E: ['', 'doPageDiv'], - 0xF547: ['', 'doXSpace'], - 0xF546: ['', 'doYSpace'], - 0xF548: ['', 'doPos'], - 0xF573: ['', 'doRuledLine'], - 0xF5D4: ['', 'doWait'], - 0xF5D6: ['', 'doSoundStop'], - } - maplinetype = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'} - - def __init__(self, objects): - self.xml = u'' - self.objects = objects - self.inBlockspace = False - def handleTag(self, tag, f): - #print "LRFPageContent:", tag - return doTagMap(self, self.tagMap, tag, f) - def closeBlockspace(self): - if self.inBlockspace: - self.xml += self.getBlockSpace() - if hasattr(self,'xspace'): delattr(self,'xspace') - if hasattr(self,'yspace'): delattr(self,'yspace') - if hasattr(self,'pos'): delattr(self,'pos') - self.inBlockspace = False - def doSimpleTag(self, tag, f): - self.closeBlockspace() - self.xml += self.tagMap[tag.tagId][2] - def doContained(self, tag, f): - self.closeBlockspace() - self.xml += self.objects[tag.paramDWord()].toXml(self.objects) - def doPageDiv(self, tag, f): - self.closeBlockspace() - pars = struct.unpack("\n' % (pars[0], pars[1], pars[2], colorToString(pars[3])) - def doRuledLine(self, tag, f): - self.closeBlockspace() - pars = struct.unpack("\n' % (pars[0], self.maplinetype[pars[1]], pars[2], colorToString(pars[3])) - def doXSpace(self, tag, f): - self.xspace = tag.paramWord() - self.inBlockspace = True - def doPos(self, tag, f): - posmap = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'} - self.pos = posmap[tag.paramWord()] - self.inBlockspace = True - def doYSpace(self, tag, f): - self.yspace = tag.paramWord() - self.inBlockspace = True - def getBlockSpace(self): - if hasattr(self,'pos'): - res = u'\n' % tag.paramWord() - def doSoundStop(self, tag, f): - self.closeBlockspace() - self.xml += u'\n' - def toXml(self, objects): - self.closeBlockspace() - return self.xml - -#02 -class LRFPage(LRFStream): - tagMap = { - 0xF503: ['pageStyle', 'D'], - 0xF50B: ['contents', 'P'], - 0xF571: ['',''], - 0xF57C: ['parentPageTree','D'], - 1: [LRFPageAtr.tagMap, LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - res = u'\n' - f = StringIO.StringIO(self.stream) - l = len(self.stream) - cont = LRFPageContent(objects) - while f.tell()\n' - return res - -#07 -class LRFBlockAtr(LRFObject): - tagMap = { - 0xF531: ['blockwidth', 'W', '%d'], - 0xF532: ['blockheight', 'W', '%d'], - 0xF533: ['blockrule', 'W', {0x14: "horz-fixed", 0x12: "horz-adjustable", 0x41: "vert-fixed", 0x21: "vert-adjustable", 0x44: "block-fixed", 0x22: "block-adjustable"}], - 0xF534: ['bgcolor', 'D', 'C'], - 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], - 0xF536: ['framewidth', 'W', '%d'], - 0xF537: ['framecolor', 'D', 'C'], - 0xF52E: ['framemode', 'W', {0: None, 2: 'curve',1:'square'}], - 0xF538: ['topskip', 'W', '%d'], - 0xF539: ['sidemargin', 'W', '%d'], - 0xF53A: ['footskip', 'W', '%d'], - 0xF529: ['', parseBgImage, bgImageToString], - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - res = u'\n" - return res; - -#03 -class LRFHeader(LRFStream): - tagMap = { - 1: [LRFBlockAtr.tagMap, LRFStream.tagMap] - } - contentTags = { - 0xF549: ['','doPutObj'] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def doPutObj(self, tag, f): - pars = struct.unpack("\n' % pars - def toXml(self, objects): - #print "in LRFText.toXml" - if self.__class__.__name__ == "LRFHeader": - res = u'\n' - self.toDump = False - f = StringIO.StringIO(self.stream) - l = len(self.stream) - self.xml = u'' - while f.tell()\n' - else: - res += u'\n' - return res - - -#04 -class LRFFooter(LRFHeader): - pass - -#08 -class LRFMiniPage(LRFStream): - tagMap = { - 0xF541: ['minipagewidth', 'W', "%d"], - 0xF542: ['minipageheight', 'W', "%d"], - 1: [LRFBlockAtr.tagMap, LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - res = u'\n" - f = StringIO.StringIO(self.stream) - l = len(self.stream) - cont = LRFPageContent(objects) - while f.tell()\n' - return res - -#06 -class LRFBlock(LRFStream): - tagMap = { - 0xF503: ['atrId', 'D'], - 1: [LRFBlockAtr.tagMap, LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - - def addXmlTags(self, objects): - res = doTagMapXml(self, LRFBlockAtr.tagMap, objects) - res += ' blockstyle="%d"' % self.atrId - res += LRFObject.addXmlTags(self, objects) - return res - - def getLinkedObjectId(self): - tag = tags.LRFTag(self.stream) - if tag.tagId != 0xF503: - raise LRFException("Bad block content") - return tag.paramDWord() - - def toXml(self, objects): - self.toDump = False - tag = tags.LRFTag(self.stream) - if tag.tagId != 0xF503: - raise LRFException("Bad block content") - obj = objects[tag.paramDWord()] - obj.toDump = False - if isinstance(obj, LRFSimpleText): - name = 'SimpleTextBlock' - elif isinstance(obj, LRFText): - name = 'TextBlock' - elif isinstance(obj, LRFImage): - name = 'ImageBlock' - elif isinstance(obj, LRFButton): - name = 'ButtonBlock' - else: - raise LRFException("Unexpected block type: "+obj.__class__.__name__) - res = u'<%s' % name - res += obj.addXmlTags(objects) - res += self.addXmlTags(objects) - res += u'>\n' - res += obj.toXml(objects) - res += u'\n' % name - return res - -#0C -class LRFImage(LRFObject): - tagMap = { - 0xF54A: ['', 'parseImageRect'], - 0xF54B: ['', 'parseImageSize'], - 0xF54C: ['refObjectId', 'D'], #imagestream or import - 0xF555: ['comment', 'P'], - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def parseImageRect(self, tag, f): - self.imageRect = struct.unpack("\n' + self.comment + u'\n' - else: - res += u'/>\n' - return res - else: - if hasattr(self,'comment'): - return self.comment - else: - return u'' - -class LRFCanvasContent: - tagMap = { - 0xF549: ['', 'doPutObj'], - } - def __init__(self, objects): - self.xml = u'' - self.objects = objects - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def doSimpleTag(self, tag, f): - self.closeSpan() - self.xml += self.tagMap[tag.tagId][2] - def doPutObj(self, tag, f): - self.xml += u'\n' % struct.unpack("\n' - f = StringIO.StringIO(self.stream) - l = len(self.stream) - cont = LRFCanvasContent(objects) - while f.tell()\n' - return res - - -def rubyAlignAndAdjustToString(rubyAlignAndAdjust): - adj = ""; - if rubyAlignAndAdjust&0xF0 == 0x10: - adj = "line-edge" - elif rubyAlignAndAdjust&0xF0 == 0: - adj = "none" - else: - adj = "bad rubyadjust(0x%X)" % rubyAlignAndAdjust&0xF0 - - if rubyAlignAndAdjust&0xF == 1: - align = "start" - elif rubyAlignAndAdjust&0xF == 2: - align = "center" - else: - align = "bad rubyalign(0x%X)" % rubyAlignAndAdjust&0xF - - return u' rubyalign="%s" rubyadjust="%s"' % (align, adj) - -def empDotsToString(self, objects): - res = u'' - if hasattr(self,'refEmpDotsFont') and self.refEmpDotsFont!=0: - res += u' refempdotsfont="%d"' % self.refEmpDotsFont - if hasattr(self,'empDotsFontName') and self.empDotsFontName!="": - res += u' empdotsfontname="%s"' % self.empDotsFontName - if hasattr(self,'empDotsCode') and self.empDotsCode!=0: - res += u' empdotscode="0x%04x"' % self.empDotsCode - return res - -rubyTags = { - 0xF575: ['rubyAlignAndAdjust', 'W', rubyAlignAndAdjustToString], - 0xF576: ['rubyoverhang', 'W', {0: 'none', 1:'auto'}], - 0xF577: ['empdotsposition', 'W', {1: 'before', 2:'after'}], - 0xF578: ['','parseEmpDots', empDotsToString], - 0xF579: ['emplineposition', 'W', {1: 'before', 2:'after'}], - 0xF57A: ['emplinetype', 'W', {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'}] -} - -def addRubyXmlTags(self, objects): - return doTagMapXml(self, rubyTags, objects) - -#0B -class LRFTextAtr(LRFObject): - tagMap = { - 0xF511: ['fontsize', 'w', "%d"], - 0xF512: ['fontwidth', 'w', "%d"], - 0xF513: ['fontescapement', 'w', "%d"], - 0xF514: ['fontorientation', 'w', "%d"], - 0xF515: ['fontweight', 'W', "%d"], - 0xF516: ['fontfacename', 'P', "%s"], - 0xF517: ['textcolor', 'D', 'C'], - 0xF518: ['textbgcolor', 'D', 'C'], - 0xF519: ['wordspace', 'w', "%d"], - 0xF51A: ['letterspace', 'w', "%d"], - 0xF51B: ['baselineskip', 'w', "%d"], - 0xF51C: ['linespace', 'w', "%d"], - 0xF51D: ['parindent', 'w', "%d"], - 0xF51E: ['parskip', 'w', "%d"], - 0xF53C: ['align', 'W', {1: 'head', 4: 'center', 8: 'foot'}], - 0xF53D: ['column', 'W', "%d"], - 0xF53E: ['columnsep', 'W', "%d"], - 0xF5DD: ['charspace', 'w', "%d"], - 0xF5F1: ['textlinewidth', 'W', "L"], - 0xF5F2: ['linecolor', 'D', 'C'], - 1: [rubyTags, LRFObject.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def parseEmpDots(self, tag, f): - self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.params; - def toXml(self, objects): - self.toDump = False - res = u'\n" - return res; - -class LRFTextContent: - tagMap = { - 0xF581: ['', 'doSimpleTag', u''], - 0xF582: ['', 'doSimpleTag', u''], - 0xF5B1: ['', 'doSimpleTag', u''], - 0xF5B2: ['', 'doSimpleTag', u''], - 0xF5B3: ['', 'doSimpleTag', u''], - 0xF5B4: ['', 'doSimpleTag', u''], - 0xF5B5: ['', 'doSimpleTag', u''], - 0xF5B6: ['', 'doSimpleTag', u''], - 0xF5A1: ['', 'doBeginP'], - 0xF5A2: ['', 'doEndP'], - 0xF5A7: ['', 'doBeginCharButton'], - 0xF5A8: ['', 'doSimpleTag', u''], - 0xF5A9: ['', 'doSimpleTag', u''], - 0xF5AA: ['', 'doSimpleTag', u''], - 0xF5AB: ['', 'doSimpleTag', u''], - 0xF5AC: ['', 'doSimpleTag', u''], - 0xF5AD: ['', 'doSimpleTag', u''], - 0xF5AE: ['', 'doSimpleTag', u''], - 0xF5B7: ['', 'doSimpleTag', u''], - 0xF5B8: ['', 'doSimpleTag', u''], - 0xF5B9: ['', 'doSimpleTag', u''], - 0xF5BA: ['', 'doSimpleTag', u''], - 0xF5BB: ['', 'doSimpleTag', u''], - 0xF5BC: ['', 'doSimpleTag', u''], - 0xF5BD: ['', 'doSimpleTag', u''], - 0xF5BE: ['', 'doSimpleTag', u''], - 0xF5C1: ['', 'doBeginEL'], - 0xF5C2: ['', 'doEndEL'], - 0xF5C3: ['', 'doBeginDrawChar'], - 0xF5C4: ['', 'doSimpleTag', ''], - 0xF5C6: ['', 'doBeginBox'], - 0xF5C7: ['', 'doSimpleTag', ''], - 0xF5CA: ['', 'doSpace'], - 0xF5CC: ['', 'doString'], - 0xF5D1: ['', 'doPlot'], - 0xF5D2: ['', 'doSimpleTag', u'\n'], - } - maplinetype = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} - mapadjustment = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} - - def __init__(self, objects): - self.xml = u'' - self.objects = objects - self.inSpan = False - self.inSpanBegin = False - self.inSpanEnd = False - self.inPSpan = False - self.spanChanges = [] - def doSpanTag(self, tag, f): - #if self.inSpanEnd: - # if self.inSpan: self.inSpan = False - # if tag.tagId in self.spanChanges: - # self.spanChanges.remove(tag.tagId) - # return - # else: - # self.xml += u'' - # self.inSpanEnd = False - if self.inSpan: - self.xml += u'' - self.inSpan = False - # self.inSpanEnd = True - text = doTagMapXml2(self, LRFTextAtr.tagMap, self.objects, tag) - if not self.inSpanBegin: - self.spanText = u'' - r = re.compile(r' (\w+)=".*?"(.*?)\1(=".*?")') - m = re.search(r, self.spanText) - while m: - #self.xml += "\n*** before: "+self.spanText+"***\n" - self.spanText = self.spanText[:m.start()] + m.group(2) + m.group(1) + m.group(3) + self.spanText[m.end():] - #self.xml += "\n*** after: "+self.spanText+"***\n" - m = re.search(r, self.spanText) - if add: - self.xml += self.spanText - self.spanText = u'' - self.inSpanBegin = False - self.inSpan = True - - def handleTag(self, tag, f): - if tag.tagId in self.tagMap: - return doTagMap(self, self.tagMap, tag, f) - else: - self.doSpanTag(tag, f) - return True - - def doSimpleTag(self, tag, f): - self.closeSpan() - self.xml += self.tagMap[tag.tagId][2] - - def doSpace(self, tag, f): - self.closeSpan() - self.xml += u''%tag.paramSWord() - - def doPlot(self, tag, f): - self.closeSpan() - pars = struct.unpack("'%self.mapadjustment[pars[3]] - - def doBeginDrawChar(self, tag, f): - self.closeSpan() - self.doOpenTag(tag, f, u'DrawChar line="%d"'%(tag.paramWord())) - - - def doOpenTag(self, tag, f, name): - if self.inSpanBegin: - self.closeSpan(False) - self.spanText=u'<'+name+self.spanText[5:] - self.xml += self.spanText - self.spanText = u'' - self.inPSpan = True - self.inSpan = False - elif self.inSpan: - raise LRFError("bad stuff happened") - self.xml += u'' - self.inSpan = False - else: - self.xml += u'<%s>'%name - - def doCloseTag(self, tag, f, name): - if self.inSpanBegin: - self.inSpanBegin = False - self.spanText = u'' - else: - self.closeSpan() - if self.inSpan: - self.xml += u'' - self.inSpan = False - if self.inPSpan: - self.inPSpan = False - self.inSpan = False - self.xml += u'\n'%name - - def doBeginP(self, tag, f): - self.doOpenTag(tag, f, u"P") - - def doEndP(self, tag, f): - self.doCloseTag(tag, f, u"P") - - def doBeginEL(self, tag, f): - self.doOpenTag(tag, f, u"EmpLine") - - def doEndEL(self, tag, f): - self.doCloseTag(tag, f, u"EmpLine") - - def doBeginCharButton(self, tag, f): - self.closeSpan() - self.xml += u'' % tag.paramDWord() - def doBeginBox(self, tag, f): - self.closeSpan() - self.xml += u'' % self.maplinetype[tag.paramWord()] - def doString(self, tag, f): - self.closeSpan() - strlen = tag.paramWord() - self.addText(f.read(strlen)) - def addText(self, text): - self.closeSpan() - mapping = { 0x22: u'"', 0x26: u'&', 0x27: u''', 0x3c: u'<', 0x3e: u'>' } - s = unicode(text,"utf-16-le") - self.xml += s.translate(mapping) - def toXml(self, objects): - self.closeSpan() - return self.xml - -#0A -class LRFText(LRFStream): - tagMap = { - 0xF503: ['atrId', 'D'], - 1: [LRFTextAtr.tagMap, LRFStream.tagMap] - } - def parseEmpDots(self, tag, f): - self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.params; - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def addXmlTags(self, objects): - res = u'' - #print "in LRFText.addXmlTags" - if hasattr(self, 'atrId'): - #print "atrId=%d" % self.atrId - res += u' textstyle="%d"' % self.atrId - else: - raise LRFException("no textstyle defined!") - res += doTagMapXml(self, LRFTextAtr.tagMap, objects) - return res - - def toXml(self, objects): - #print "in LRFText.toXml" - res = u'' - #res += u'' % self.objId - self.toDump = False - f = StringIO.StringIO(self.stream) - l = len(self.stream) - cont = LRFTextContent(objects) - while f.tell()' - return res - -#0E -class LRFESound(LRFObject): - tagMap = { - 0xF553: ['refstream', 'D', "%d"], #refstream - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects, noblock=False): - self.toDump = False - res = u'\n' - return res - -imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'} - -#11 -class LRFImageStream(LRFStream): - tagMap = { - 0xF555: ['comment', 'P'], - 1: [LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - ext = imgext[self.streamFlags & 0xFF] - fname = 'imagestream_%d.%s' % (self.objId, ext) - file(fname,'wb').write(self.stream) - res = u'\n' + self.comment + u'\n' - else: - res += u'/>\n' - return res - -#12 -class LRFImport(LRFStream): - tagMap = { - 0xF50E: ['importtype', 'W'], - 0: LRFStream.tagMap - } - xmlMap = { - 0xF50E: ['importtype', 'W', {0x11: 'ImageStream', 0x17: 'SoundStream'}] - } - importTags = { - 0xF556: ['', 'doObjLink'], - 0xF50D: ['', 'doFileLink'] - } - acctypemap = {1: 'url', 2:'cid', 3:'pass'} - - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def addText(self, text): - mapping = { 0x22: u'"', 0x26: u'&', 0x27: u''', 0x3c: u'<', 0x3e: u'>' } - s = unicode(text,"utf-16-le") - self.xml += s.translate(mapping) - - def doObjLink(self, tag, f): - acctype = tag.paramWord() - if f.tell()\n' - self.toDump = False - f = StringIO.StringIO(self.stream) - self.streamlen = len(self.stream) - self.xml = u'' - while f.tell()\n' - return res - -#13 -class LRFButton(LRFObject): - tagMap = { - 0xF503: ['', 'doRefImage'], - 0xF561: ['buttonFlags','W'], #\n' - return res - -#08 -class LRFWindow(LRFStream): - tagMap = { - 0xF5DB: ['windowwidth', 'W', "%d"], - 0xF5DC: ['windowheight', 'W', "%d"], - 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], - 1: [LRFBlockAtr.tagMap, LRFStream.tagMap] - } - xmlMap = { - 0xF5DB: ['windowwidth', 'W', "%d"], - 0xF5DC: ['windowheight', 'W', "%d"], - 0xFF00: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], - 0xFF01: ['setwaitsync', 'W', {0: 'sync', 0x10: 'async'}] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - res = u'\n" - f = StringIO.StringIO(self.stream) - l = len(self.stream) - cont = LRFPageContent(objects) - while f.tell()\n' - return res - -#15 -class LRFPopUpWin(LRFObject): - tagMap = { - 0xF503: ['refBlockId', 'D'], - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - #print "in LRFSound.toXml" - self.toDump = False - res = u'\n' - res += objects[self.refBlockId].toXml(objects) - res += u'\n' - return res -#16 -class LRFSound(LRFObject): - tagMap = { - 0xF557: ['times', 'W', "%d"], - 0xF558: ['playmode', 'W', {0:'sync', 1:'async'}], - 0xF54C: ['refstream', 'D', "%d"], #stream or import - 0: LRFObject.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - #print "in LRFSound.toXml" - self.toDump = False - res = u'\n' - return res - -#17 -class LRFSoundStream(LRFStream): - tagMap = { - 1: [LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - #print "in LRFSoundStream.toXml" - sndext = {0x23: 'pcm', 0x21: 'mp3', 0x24:'atrac'} - ext = sndext[self.streamFlags & 0xFF] - fname = 'soundstream_%d.%s' % (self.objId, ext) - file(fname,'wb').write(self.stream) - res = u'\n' - return res - -#18 -class LRFFont(LRFStream): - tagMap = { - 0xF559: ['fontFilename', 'P'], - 0xF55D: ['fontFacename', 'P'], - 1: [LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - return u'\n'%(self.fontFacename, self.fontFilename, self.fontFilename) - -#1A -class LRFObjectInfo(LRFStream): - tagMap = { - 1: [LRFStream.tagMap] - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def toXml(self, objects): - self.toDump = False - return u'' - -#1D -class LRFSimpleText(LRFText): - pass - """tagMap = { - 0: LRFText.tagMap - } - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f)""" - -def bindingToString(binding): - if binding==1: - return "Lr" - elif binding==16: - return "Rl" - else: return "bad binding(%d)" % binding - -#1C -class LRFBookAtr(LRFObject): - tagMap = { - 0xF57B: ['pageTreeId', 'D', lambda val: ''], - 0xF5D8: ['', 'addRFont', nullfunc], - 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], - 1: [rubyTags, LRFObject.tagMap] - } - def __init__(self, objId): - self.FontLinkList = [] - LRFObject.__init__(self, objId) - def handleTag(self, tag, f): - return doTagMap(self, self.tagMap, tag, f) - def parseEmpDots(self, tag, f): - self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.params - def addRFont(self, tag, f): - self.FontLinkList.append(tag.paramDWord()) - def toXml(self, objects, lrffile): - self.toDump = False - res = u'\n\n' - res += '\n' \ - % (bindingToString(lrffile.binding), lrffile.dpi, lrffile.width, lrffile.height, lrffile.colorDepth) - for i in self.FontLinkList: - res += objects[i].toXml(objects) - res += u'\n' - return res - -#1E -class LRFTOCObject(LRFStream): - def handleTag(self, tag, f): - return LRFStream.handleTag(self, tag, f) - def toXml(self, objects): - self.toDump = False - res = u'\n' - f = StringIO.StringIO(self.stream) - l = len(self.stream) - c = getWord(f) - f.seek(4*(c+1)) - while c>0: - #ImageBlock108label - refpage = getDWord(f) - refobj = getDWord(f) - label = getString(f) - res += u'%s\n' % (refobj, refpage, label) - objects[refobj].toclabel = label - c -= 1 - res += u"\n" - return res - -def LRFObjectFactory(f, offset, size, key): - objMap = [ - None, #00 - LRFPageTree, #01 - LRFPage, #02 - LRFHeader, #03 - LRFFooter, #04 - LRFPageAtr, #05 - LRFBlock, #06 - LRFBlockAtr, #07 - LRFMiniPage, #08 - None, #09 - LRFText, #0A - LRFTextAtr, #0B - LRFImage, #0C - LRFCanvas, #0D - LRFESound, #0E - None, #0F - None, #10 - LRFImageStream,#11 - LRFImport, #12 - LRFButton, #13 - LRFWindow, #14 - LRFPopUpWin, #15 - LRFSound, #16 - LRFSoundStream,#17 - None, #18 - LRFFont, #19 - LRFObjectInfo, #1A - None, #1B - LRFBookAtr, #1C - LRFSimpleText, #1D - LRFTOCObject, #1E - ] - - f.seek(offset) - startTag = tags.LRFTag(f) - if startTag.tagId!=0xF500: raise LRFException("Bad object start!") - objId, objType = struct.unpack("800): - self.thumbType = getWord(f) - self.thumbSize = getDWord(f) - if (self.DocInfoCompSize): - uncompSize = getDWord(f) - try: - self.docInfo = zlib.decompress(f.read(self.DocInfoCompSize-4)) - except zlib.error: - raise LRFException("decompression failed"); - if len(self.docInfo)!=uncompSize: - raise LRFException("expected %d, got %d decompressed bytes" % (uncompSize, len(self.docInfo))); - if (self.version>800 and self.thumbSize>0): - self.thumbname = self.filename[:-4]+"_thumb."+imgext[self.thumbType] - file(self.thumbname,'wb').write(f.read(self.thumbSize)) - - def parseObjects(self,f): - print "Parsing objects...", - if (self.offObjectTable): - f.seek(self.offObjectTable) - obj_array = array.array("I",f.read(4*4*self.nObjects)) - if ord(array.array("i",[1]).tostring()[0])==0: #big-endian - obj_array.byteswap() - self.objects = {} - for i in range(self.nObjects): - objid, objoff, objsize = obj_array[i*4:i*4+3] - obj = LRFObjectFactory(f, objoff, objsize, self.xorKey) - #print obj - self.objects[objid] = obj - print "done." - else: - raise LRFExceptions("Call parseHeader() first!") - - def parse(self,f): - f.seek(0); - self.parseHeader(f) - self.parseObjects(f) - - def getDocInfo(self): - r=unicode(self.docInfo,"utf-16-le") - r=re.sub(r"<\?xml.*?>","",r) - r=string.replace(r,"","") - r=string.replace(r,"","") - if (self.version>800 and self.thumbSize>0): - r=string.replace(r,"",'\n' % self.thumbname) - return r - - def toXml(self): - xml = u""" - - -""" - xml += self.getDocInfo() - #xml += self.objects[self.TocObjId].toXml(self.objects) - xml += u"\n" - rootObj = self.objects[self.rootObjectId] #should be a BookAtr - pageTree = self.objects[rootObj.pageTreeId] - xml += pageTree.toXml(self.objects, True) - xml += u'\n' - xml += u"\n" - - hadSolo = False - for i in self.objects: - o = self.objects[i] - if isinstance(o,LRFPageTree) and o.toDump: - if not hadSolo: - print "Writing solo pages...", - hadSolo = True - xml += u"\n" - xml += o.toXml(self.objects) - - if hadSolo: - xml += u"" - print "done." - - xml += u'\n' - print "Writing external streams...", - for i in self.objects: - o = self.objects[i] - if o.toDump == False: - continue - if isinstance(o,LRFImage) or isinstance(o,LRFButton): - #print "object id=%02X" % o.objId - xml += o.toXml(self.objects, True) - #if isinstance(o,LRFImageStream) or isinstance(o,LRFSoundStream) or isinstance(o,LRFSound) or isinstance(o,LRFPopUpWin): - else: - #print "object id=%02X" % o.objId - xml += o.toXml(self.objects) - print "done." - xml += u'\n' - xml += u"" - return xml - - def __str__(self): - s = "Version: %d\nXor key: %02X\nRoot object: %X\nObjects: %d\nObject table offset: %X\n" \ - % (self.version, self.xorKey, self.rootObjectId, self.nObjects, self.offObjectTable) - s += "Flags: %02X\nSize: %dx%d\nToc object: %X\nToc offset: %X" \ - % (self.flags, self.width, self.height, self.TocObjId, self.TocObjOffset) - #s += "Objects: \n" + str(self.objects) - -def option_parser(): - from optparse import OptionParser - return OptionParser(usage='%prog file.lrf', version=__appname__+' '+__version__) - -def main(args=sys.argv): - print "lrf2lrs (c) 2006-2007 roxfan, igorsk" - parser = option_parser() - args = parser.parse_args(args)[1] - if len(args)>1: - fname = args[1] - f = file(fname, 'rb') - h = LRFFile(fname) - h.parse(f) - if fname[-4:].lower()==".lrf": - outfname = fname[:-4]+".lrs" - else: - outfname = fname+".lrs" - out = codecs.open(outfname,"w","utf-16") - out.write(h.toXml()) - return 0 - else: - parser.print_help() - return 1 - -if __name__=='__main__': - sys.exit(main()) diff --git a/src/libprs500/ebooks/lrf/lrs/tags.py b/src/libprs500/ebooks/lrf/lrs/tags.py deleted file mode 100644 index cf1e4e637d..0000000000 --- a/src/libprs500/ebooks/lrf/lrs/tags.py +++ /dev/null @@ -1,257 +0,0 @@ -import struct, StringIO - -tagparams = {} -tagnames = {} - -class LRFTagException(Exception): - def __init__(self,msg): - self.msg=msg - def __str__(self): - return repr(self.msg) - -def getByte(f): - return struct.unpack(" count, then count s -def Tag0B_5CParser(f): - cnt = getWord(f) - res = [] - while cnt>0: - res.append(getDWord(f)) - cnt -= 1 - return res - -def DummyTagParser(f): - raise LRFTagException("Uknown dummy tag at %08X" % f.tell()) - -# size, then string of size bytes -def TagStringParser(f): - cnt = getWord(f) - return unicode(f.read(cnt),"utf_16") - -#, then -def Tag78Parser(f): - pos = f.tell() - res = [] - res.append(getDWord(f)) - tag = LRFTag(f) - if tag.tagId != 0xF516: raise LRFTagException("Bad tag 78 at %08X" % pos) - res.append(tag.params) - res.append(getWord(f)) - return res - -def_tag(0x00, 6, "*ObjectStart") -def_tag(0x01, 0, "*ObjectEnd") -def_tag(0x02, 4, "*ObjectInfoLink") -def_tag(0x03, 4, "*Link") -def_tag(0x04, 4, "*StreamSize") -def_tag(0x05, 0, "*StreamStart") -def_tag(0x06, 0, "*StreamEnd") -def_tag(0x07, 4) -def_tag(0x08, 4) -def_tag(0x09, 4) -def_tag(0x0A, 4) -def_tag(0x0B, Tag0B_5CParser, "*ContainedObjectsList") -def_tag(0x0D, 2) -def_tag(0x0E, 2) -def_tag(0x11, 2) -def_tag(0x12, 2) -def_tag(0x13, 2) -def_tag(0x14, 2) -def_tag(0x15, 2) -def_tag(0x16, TagStringParser) -def_tag(0x17, 4) -def_tag(0x18, 4) -def_tag(0x19, 2) -def_tag(0x1A, 2) -def_tag(0x1B, 2) -def_tag(0x1C, 2) -def_tag(0x1D, 2) -def_tag(0x1E, 2) -def_tag(0x21, 2) -def_tag(0x22, 2) -def_tag(0x23, 2) -def_tag(0x24, 2) -def_tag(0x25, 2) -def_tag(0x26, 2) -def_tag(0x27, 2) -def_tag(0x28, 2) -def_tag(0x29, 6) -def_tag(0x2A, 2) -def_tag(0x2B, 2) -def_tag(0x2C, 2) -def_tag(0x2D, 4) -def_tag(0x2E, 2) -def_tag(0x31, 2) -def_tag(0x32, 2) -def_tag(0x33, 2) -def_tag(0x34, 4) -def_tag(0x35, 2) -def_tag(0x36, 2) -def_tag(0x37, 4) -def_tag(0x38, 2) -def_tag(0x39, 2) -def_tag(0x3A, 2) -def_tag(0x3C, 2) -def_tag(0x3D, 2) -def_tag(0x3E, 2) -def_tag(0x41, 2) -def_tag(0x42, 2) -def_tag(0x44, 4) -def_tag(0x45, 4) -def_tag(0x46, 2) -def_tag(0x47, 2) -def_tag(0x48, 2) -def_tag(0x49, 8) -def_tag(0x4A, 8) -def_tag(0x4B, 4) -def_tag(0x4C, 4) -def_tag(0x4D, 0) -def_tag(0x4E, 12) -def_tag(0x51, 2) -def_tag(0x52, 2) -def_tag(0x53, 4) -def_tag(0x54, 2, "*StreamFlags") -def_tag(0x55, TagStringParser) -def_tag(0x56, 2) -def_tag(0x57, 2) -def_tag(0x58, 2) -def_tag(0x59, TagStringParser) -def_tag(0x5A, TagStringParser) -def_tag(0x5B, 4) -def_tag(0x5C, Tag0B_5CParser) -def_tag(0x5D, TagStringParser) -def_tag(0x5E, 2) -def_tag(0x61, 2) -def_tag(0x62, 0) -def_tag(0x63, 0) -def_tag(0x64, 0) -def_tag(0x65, 0) -def_tag(0x66, 0) -def_tag(0x67, 0) -def_tag(0x68, 0) -def_tag(0x69, 0) -def_tag(0x6A, 0) -def_tag(0x6B, 0) -def_tag(0x6C, 8) -def_tag(0x6D, 2) -def_tag(0x6E, 0) -def_tag(0x71, 0) -def_tag(0x72, 0) -def_tag(0x73, 10) -def_tag(0x75, 2) -def_tag(0x76, 2) -def_tag(0x77, 2) -def_tag(0x78, Tag78Parser) -def_tag(0x79, 2) -def_tag(0x7A, 2) -def_tag(0x7B, 4) -def_tag(0x7C, 4, "*ParentPageTree") -def_tag(0x81, 0) -def_tag(0x82, 0) -def_tag(0xA1, 4) -def_tag(0xA2, 0) -def_tag(0xA5, DummyTagParser) -def_tag(0xA6, 0) -def_tag(0xA7, 4) -def_tag(0xA8, 0) -def_tag(0xA9, 0) -def_tag(0xAA, 0) -def_tag(0xAB, 0) -def_tag(0xAC, 0) -def_tag(0xAD, 0) -def_tag(0xAE, 0) -def_tag(0xB1, 0) -def_tag(0xB2, 0) -def_tag(0xB3, 0) -def_tag(0xB4, 0) -def_tag(0xB5, 0) -def_tag(0xB6, 0) -def_tag(0xB7, 0) -def_tag(0xB8, 0) -def_tag(0xB9, 0) -def_tag(0xBA, 0) -def_tag(0xBB, 0) -def_tag(0xBC, 0) -def_tag(0xBD, 0) -def_tag(0xBE, 0) -def_tag(0xC1, 0) -def_tag(0xC2, 0) -def_tag(0xC3, 2) -def_tag(0xC4, 0) -def_tag(0xC5, 2) -def_tag(0xC6, 2) -def_tag(0xC7, 0) -def_tag(0xC8, 2) -def_tag(0xC9, 0) -def_tag(0xCA, 2) -def_tag(0xCB, DummyTagParser) -def_tag(0xCC, 2) -def_tag(0xD1, 12) -def_tag(0xD2, 0) -def_tag(0xD4, 2) -def_tag(0xD6, 0) -def_tag(0xD7, 14) -def_tag(0xD8, 4) -def_tag(0xD9, 8) -def_tag(0xDA, 2) -def_tag(0xDB, 2) -def_tag(0xDC, 2) -def_tag(0xDD, 2) -def_tag(0xF1, 2) -def_tag(0xF2, 4) -def_tag(0xF3, 4) -def_tag(0xF4, 2) -def_tag(0xF5, 4) -def_tag(0xF6, 4) -def_tag(0xF7, 4) -def_tag(0xF8, 4) -def_tag(0xF9, 6) diff --git a/src/libprs500/ebooks/lrf/meta.py b/src/libprs500/ebooks/lrf/meta.py index ab552f102a..f838546419 100644 --- a/src/libprs500/ebooks/lrf/meta.py +++ b/src/libprs500/ebooks/lrf/meta.py @@ -297,7 +297,7 @@ class LRFMetaFile(object): version = field(fmt=WORD, start=0x8) xor_key = field(fmt=WORD, start=0xa) root_object_id = field(fmt=DWORD, start=0xc) - number_of_objets = field(fmt=QWORD, start=0x10) + number_of_objects = field(fmt=QWORD, start=0x10) object_index_offset = field(fmt=QWORD, start=0x18) binding = field(fmt=BYTE, start=0x24) dpi = field(fmt=WORD, start=0x26) @@ -329,7 +329,7 @@ class LRFMetaFile(object): # Format is %Y-%m-%d creation_date = xml_field("CreationDate", parent="DocInfo") producer = xml_field("Producer", parent="DocInfo") - page = xml_field("Page", parent="DocInfo") + page = xml_field("SumPage", parent="DocInfo") def safe(func): """ @@ -547,7 +547,7 @@ class LRFMetaFile(object): elif ttype == 0x12: ext = "png" elif ttype == 0x13: - ext = "bm" + ext = "bmp" return ext def fix_thumbnail_type(self): diff --git a/src/libprs500/ebooks/lrf/objects.py b/src/libprs500/ebooks/lrf/objects.py index a6b552e1ab..daf1a7244a 100644 --- a/src/libprs500/ebooks/lrf/objects.py +++ b/src/libprs500/ebooks/lrf/objects.py @@ -12,7 +12,7 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import struct, array, zlib +import struct, array, zlib, cStringIO from libprs500.ebooks.lrf import LRFParseError from libprs500.ebooks.lrf.tags import Tag @@ -47,10 +47,32 @@ class LRFObject(object): @classmethod def parse_empdots(self, tag, f): self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents - - def __init__(self, stream, id, scramble_key, boundary): - self._scramble_key = scramble_key + @staticmethod + def tag_to_val(h, obj, tag, stream): + if h[1] == 'D': + val = tag.dword + elif h[1] == 'W': + val = tag.word + elif h[1] == 'w': + val = tag.word + if val > 0x8000: + val -= 0x10000 + elif h[1] == 'B': + val = tag.byte + elif h[1] == 'P': + val = tag.contents + elif h[1] != '': + val = getattr(obj, h[1])(tag, stream) + if len(h) > 2: + val = h[2](val) if callable(h[2]) else h[2][val] + return val + + def __init__(self, document, stream, id, scramble_key, boundary): + self._scramble_key = scramble_key + self._document = document + self.id = id + while stream.tell() < boundary: tag = Tag(stream) self.handle_tag(tag, stream) @@ -60,31 +82,68 @@ class LRFObject(object): def handle_tag(self, tag, stream, tag_map=None): if tag_map is None: - tag_map = self.__class__.tag_map + tag_map = self.__class__.tag_map if tag.id in tag_map: h = tag_map[tag.id] - if h[1] == 'D': - val = tag.dword - elif h[1] == 'W': - val = tag.word - elif h[1] == 'w': - val = tag.word - if val > 0x8000: - val -= 0x10000 - elif h[1] == 'B': - val = tag.paramByte() - elif h[1] == 'P': - val = tag.contents - elif h[1] != '': - val = getattr(self, h[1])(tag, stream) - + val = LRFObject.tag_to_val(h, self, tag, stream) if h[1] != '' and h[0] != '': - if len(h) > 2: - val = h[2][val] setattr(self, h[0], val) else: raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag))) + def __iter__(self): + for i in range(0): + yield i + + def __unicode__(self): + return unicode(self.__class__.__name__) + + def __str__(self): + return unicode(self) + +class LRFContentObject(LRFObject): + + tag_map = {} + + def __init__(self, bytes, objects): + self.stream = bytes if hasattr(bytes, 'read') else cStringIO.StringIO(bytes) + length = self.stream_size() + self.objects = objects + self._contents = [] + self.current = 0 + self.in_container = True + self.parse_stream(length) + + def parse_stream(self, length): + while self.in_container and self.stream.tell() < length: + tag = Tag(self.stream) + self.handle_tag(tag) + + def stream_size(self): + pos = self.stream.tell() + self.stream.seek(0, 2) + size = self.stream.tell() + self.stream.seek(pos) + return size + + def handle_tag(self, tag): + if tag.id in self.tag_map: + action = self.tag_map[tag.id] + if isinstance(action, basestring): + func, args = action, tuple([]) + else: + func, args = action[0], (action[1],) + getattr(self, func)(tag, *args) + else: + raise LRFParseError("Unknown tag in %s: %s" % (self.__class__.__name__, str(tag))) + + def __iter__(self): + for i in self._contents: + yield i + + + + class LRFStream(LRFObject): tag_map = { 0xF504: ['', 'read_stream_size'], @@ -94,11 +153,11 @@ class LRFStream(LRFObject): } tag_map.update(LRFObject.tag_map) - def __init__(self, stream, id, scramble_key, boundary): + def __init__(self, document, stream, id, scramble_key, boundary): self.stream = '' self.stream_size = 0 self.stream_read = False - LRFObject.__init__(self, stream, id, scramble_key, boundary) + LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def read_stream_size(self, tag, stream): self.stream_size = tag.dword @@ -130,12 +189,26 @@ class LRFStream(LRFObject): class PageTree(LRFObject): tag_map = { - 0xF55C: ['pageList', 'P'], + 0xF55C: ['_contents', 'P'], } tag_map.update(LRFObject.tag_map) + def __iter__(self): + for id in self._contents: + yield self._document.objects[id] -class PageAttr(LRFObject): +class StyleObject(object): + + def __unicode__(self): + s = '<%s objid="%s" stylelabel="%s" '%(self.__class__.__name__.replace('Attr', 'Style'), self.id, self.id) + for h in self.tag_map.values(): + attr = h[0] + if hasattr(self, attr): + s += '%s="%s" '%(attr, getattr(self, attr)) + s += '/>\n' + return s + +class PageAttr(StyleObject, LRFObject): tag_map = { 0xF507: ['oddheaderid', 'D'], 0xF508: ['evenheaderid', 'D'], @@ -159,26 +232,180 @@ class PageAttr(LRFObject): tag_map.update(LRFObject.tag_map) +class Color(object): + def __init__(self, val): + self.b, self.g, self.r, self.a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF + + def __unicode__(self): + return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) + + def __str__(self): + return unicode(self) + +class EmptyPageElement(object): + def __iter__(self): + for i in range(0): + yield i + + def __str__(self): + return unicode(self) + +class PageDiv(EmptyPageElement): + + def __init__(self, pain, spacesize, linewidth, linecolor): + self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth + self.linecolor = Color(linecolor) + + def __unicode__(self): + return u'\n\n'%\ + (self.pain, self.spacesize, self.linewidth, self.color) + + +class RuledLine(EmptyPageElement): + + linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'} + + def __init__(self, linelength, linetype, linewidth, linecolor): + self.linelength, self.linewidth = linelength, linewidth + self.linetype = self.linetype_map[linetype] + self.linecolor = Color(linecolor) + + def __unicode__(self): + return u'\n\n'%\ + (self.linelength, self.linetype, self.linewidth, self.linecolor) + +class Wait(EmptyPageElement): + + def __init__(self, time): + self.time = time + + def __unicode__(self): + return u'\n\n'%(self.time) + +class Locate(EmptyPageElement): + + pos_map = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'} + + def __init__(self, pos): + self.pos = self.pos_map[pos] + + def __unicode__(self): + return u'\n\n'%(self.pos) + +class BlockSpace(EmptyPageElement): + + def __init__(self, xspace, yspace): + self.xsace, self.yspace = xspace, yspace + + def __unicode__(self): + return u'\n\n'%\ + (self.xspace, self.ysapce) + class Page(LRFStream): tag_map = { - 0xF503: ['pageStyle', 'D'], - 0xF50B: ['contents', 'P'], + 0xF503: ['style_id', 'D'], + 0xF50B: ['obj_list', 'P'], 0xF571: ['', ''], - 0xF57C: ['parentPageTree','D'], + 0xF57C: ['parent_page_tree','D'], } tag_map.update(PageAttr.tag_map) tag_map.update(LRFStream.tag_map) + class Content(LRFContentObject): + tag_map = { + 0xF503: 'link', + 0xF54E: 'page_div', + 0xF547: 'x_space', + 0xF546: 'y_space', + 0xF548: 'pos', + 0xF573: 'ruled_line', + 0xF5D4: 'wait', + 0xF5D6: 'sound_stop', + } + + def __init__(self, bytes, objects): + self.in_blockspace = False + LRFContentObject.__init__(self, bytes, objects) + + def link(self, tag): + self.close_blockspace() + self._contents.append(self.objects[tag.dword]) + + def page_div(self, tag): + self.close_blockspace() + pars = struct.unpack("\n'%(self.style_id, self.id) + for i in self: + s += unicode(i) + s += '\n\n' + return s + + def __str__(self): + return unicode(self) + + -class BlockAttr(LRFObject): +class BlockAttr(StyleObject, LRFObject): tag_map = { 0xF531: ['blockwidth', 'W'], 0xF532: ['blockheight', 'W'], 0xF533: ['blockrule', 'W', {0x14: "horz-fixed", 0x12: "horz-adjustable", 0x41: "vert-fixed", 0x21: "vert-adjustable", 0x44: "block-fixed", 0x22: "block-adjustable"}], - 0xF534: ['bgcolor', 'D'], + 0xF534: ['bgcolor', 'D', Color], 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], 0xF536: ['framewidth', 'W'], - 0xF537: ['framecolor', 'D'], + 0xF537: ['framecolor', 'D', Color], 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], 0xF538: ['topskip', 'W'], 0xF539: ['sidemargin', 'W'], @@ -187,25 +414,7 @@ class BlockAttr(LRFObject): } tag_map.update(LRFObject.tag_map) -class Block(LRFStream): - tag_map = { - 0xF503: ['atrId', 'D'], - } - tag_map.update(BlockAttr.tag_map) - tag_map.update(LRFStream.tag_map) - -class Header(LRFStream): - tag_map = {} - tag_map.update(LRFStream.tag_map) - tag_map.update(BlockAttr.tag_map) - -class Footer(Header): - pass - -class MiniPage(LRFObject): - pass - -class TextAttr(LRFObject): +class TextAttr(StyleObject, LRFObject): tag_map = { 0xF511: ['fontsize', 'w'], 0xF512: ['fontwidth', 'w'], @@ -213,8 +422,8 @@ class TextAttr(LRFObject): 0xF514: ['fontorientation', 'w'], 0xF515: ['fontweight', 'W'], 0xF516: ['fontfacename', 'P'], - 0xF517: ['textcolor', 'D'], - 0xF518: ['textbgcolor', 'D'], + 0xF517: ['textcolor', 'D', Color], + 0xF518: ['textbgcolor', 'D', Color], 0xF519: ['wordspace', 'w'], 0xF51A: ['letterspace', 'w'], 0xF51B: ['baselineskip', 'w'], @@ -226,45 +435,384 @@ class TextAttr(LRFObject): 0xF53E: ['columnsep', 'W'], 0xF5DD: ['charspace', 'w'], 0xF5F1: ['textlinewidth', 'W'], - 0xF5F2: ['linecolor', 'D'], + 0xF5F2: ['linecolor', 'D', Color], } tag_map.update(LRFObject.tag_map) + +class Block(LRFStream): + tag_map = { + 0xF503: ['style_id', 'D'], + } + tag_map.update(BlockAttr.tag_map) + tag_map.update(TextAttr.tag_map) + tag_map.update(LRFStream.tag_map) + extra_attrs = [i[0] for i in BlockAttr.tag_map.values()] + extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()]) + + @apply + def style(): + def fget(self): + return self._document.objects[self.style_id] + return property(fget=fget) + + @apply + def textstyle(): + def fget(self): + return self._document.objects[self.textstyle_id] + return property(fget=fget) + + def initialize(self): + self.attrs = {} + stream = cStringIO.StringIO(self.stream) + tag = Tag(stream) + if tag.id != 0xF503: + raise LRFParseError("Bad block content") + obj = self._document.objects[tag.dword] + if isinstance(obj, SimpleText): + self.name = 'SimpleTextBlock' + self.textstyle_id = obj.style_id + elif isinstance(obj, Text): + self.name = 'TextBlock' + self.textstyle_id = obj.style_id + elif isinstance(obj, Image): + self.name = 'ImageBlock' + for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'): + self.attrs[attr] = getattr(obj, attr) + elif isinstance(obj, Button): + self.name = 'ButtonBlock' + else: + raise LRFParseError("Unexpected block type: "+obj.__class__.__name__) + + self.content = obj + + + for attr in self.extra_attrs: + if hasattr(self, attr): + self.attrs[attr] = getattr(self, attr) + + def __iter__(self): + try: + for i in iter(self.content): + yield i + except TypeError: + yield self.content + + def __unicode__(self): + s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) + if hasattr(self, 'textstyle_id'): + s += 'textstyle="%d" '%(self.textstyle_id,) + for attr in self.attrs: + s += '%s="%s" '%(attr, self.attrs[attr]) + s = s.rstrip()+'>\n' + if self.name != 'ImageBlock': + for i in self: + s += unicode(i) + s += '\n'%(self.name,) + return s + + +class MiniPage(LRFStream): + tag_map = { + 0xF541: ['minipagewidth', 'W'], + 0xF542: ['minipageheight', 'W'], + } + tag_map.update(LRFStream.tag_map) + tag_map.update(BlockAttr.tag_map) + class Text(LRFStream): tag_map = { - 0xF503: ['atrId', 'D'], + 0xF503: ['style_id', 'D'], } tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) + + @apply + def style(): + def fget(self): + return self._document.objects[self.style_id] + return property(fget=fget) + + class Content(LRFContentObject): + tag_map = { + 0xF581: ['simple_container', 'Italic'], + 0xF582: 'end_container', + 0xF5B1: ['simple_container', 'Yoko'], + 0xF5B2: 'end_container', + 0xF5B3: ['simple_container', 'Tate'], + 0xF5B4: 'end_container', + 0xF5B5: ['simple_container', 'Nekase'], + 0xF5B6: 'end_container', + 0xF5A1: 'start_para', + 0xF5A2: 'end_para', + 0xF5A7: 'char_button', + 0xF5A8: 'end_container', + 0xF5A9: ['simple_container', 'Rubi'], + 0xF5AA: 'end_container', + 0xF5AB: ['simple_container', 'Oyamoji'], + 0xF5AC: 'end_container', + 0xF5AD: ['simple_container', 'Rubimoji'], + 0xF5AE: 'end_container', + 0xF5B7: ['simple_container', 'Sup'], + 0xF5B8: 'end_container', + 0xF5B9: ['simple_container', 'Sub'], + 0xF5BA: 'end_container', + 0xF5BB: ['simple_container', 'NoBR'], + 0xF5BC: 'end_container', + 0xF5BD: ['simple_container', 'EmpDots'], + 0xF5BE: 'end_container', + 0xF5C1: ['simple_container', 'EmpLine'], + 0xF5C2: 'end_container', + 0xF5C3: 'draw_char', + 0xF5C4: 'end_container', + 0xF5C6: 'box', + 0xF5C7: 'end_container', + 0xF5CA: 'space', + 0xF5CC: 'string', + 0xF5D1: 'plot', + 0xF5D2: 'cr', + } + + text_map = { 0x22: u'"', 0x26: u'&', 0x27: u''', 0x3c: u'<', 0x3e: u'>' } + linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} + adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} + + def __init__(self, bytes, objects, parent=None, name=None, attrs={}): + self.parent = parent + self.name = name + self.attrs = attrs + LRFContentObject.__init__(self, bytes, objects) + + def parse_stream(self, length): + offset = self.stream.tell() + while self.in_container and offset < length: + buf = self.stream.getvalue()[offset:] + pos = buf.find('\xf5') - 1 + if pos > 0: + self.stream.seek(offset+pos) + self.add_text(buf[:pos]) + self.handle_tag(Tag(self.stream)) + offset = self.stream.tell() + + def handle_tag(self, tag): + if tag.id in self.tag_map: + action = self.tag_map[tag.id] + if isinstance(action, basestring): + func, args = action, tuple([]) + else: + func, args = action[0], (action[1],) + getattr(self, func)(tag, *args) + elif tag.id in TextAttr.tag_map: + h = TextAttr.tag_map[tag.id] + val = LRFObject.tag_to_val(h, None, tag, self.stream) + if self.name == 'Span': + if h[0] not in self.attrs: + self.attrs[h[0]] = val + elif val != self.attrs[h[0]]: + if self._contents: self.parent._contents.append(self) + Text.Content(self.stream, self.objects, self.parent, + 'Span', {h[0]: val}) + + + else: + Text.Content(self.stream, self.objects, self, + 'Span', {h[0]: val}) + + else: + raise LRFParseError('Unknown tag in text stream %s'&(tag,)) + + + def simple_container(self, tag, name): + cont = Text.Content(self.stream, self.objects, parent=self, name=name) + self._contents.append(cont) + + def end_container(self, *args): + self.in_container = False + if self.name == 'Span' and self._contents: + self.parent._contents.append(self) + + def end_to_root(self): + parent = self + while parent: + parent.end_container() + parent = parent.parent + + def root(self): + root = self + while root.parent: + root = root.parent + return root + + def start_para(self, tag): + self.end_to_root() + root = self.root() + root.in_container = True + + p = Text.Content(self.stream, self.objects, parent=root, name='P') + root._contents.append(p) + + def end_para(self, tag): + self.end_to_root() + root = self.root() + root.in_container = True + + def cr(self, tag): + self._contents.append(Text.Content('', self.objects, parent=self, name='CR')) + + def char_button(self, tag): + self._contents.append(Text.Content(self.stream, self.objects, parent=self, + name='CharButton', attrs={'refobj':tag.dword})) + + def space(self, tag): + self._contents.append(Text.Content('', self.objects, parent=self, + name='Space', attrs={'xsize':tag.sword})) + + def string(self, tag): + strlen = tag.word + self.add_text(self.stream.read(strlen)) + + def add_text(self, text): + s = unicode(text, "utf-16-le") + self._contents.append(s.translate(self.text_map)) + + def plot(self, tag): + xsize, ysize, refobj, adjustment = struct.unpack("' + s = u'' + if self.name is not None: + s += u'<'+self.name+u' ' + for attr in self.attrs: + s += u'%s="%s" '%(attr, self.attrs[attr]) + s = s.rstrip() + u'>' + for i in self: + s += unicode(i) + if self.name is not None: + s += u''%(self.name,) + if self.name in ['P', "CR"]: + s += '\n' + return s + + def __str__(self): + return unicode(self) + + def initialize(self): + self.content = Text.Content(self.stream, self._document.objects) + + def __iter__(self): + for i in self.content: + yield i + + def __str__(self): + return unicode(self.content) class Image(LRFObject): tag_map = { 0xF54A: ['', 'parse_image_rect'], 0xF54B: ['', 'parse_image_size'], - 0xF54C: ['ref_object_id', 'D'], #imagestream or import + 0xF54C: ['refstream', 'D'], 0xF555: ['comment', 'P'], } def parse_image_rect(self, tag, f): - self.image_rect = struct.unpack("\n'%\ + (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) + +class PutObj(EmptyPageElement): + + def __init__(self, x, y, refobj): + self.x, self.y, self.refobj = x, y, refobj + + def __unicode__(self): + return u''%(self.x, self.y, self.refobj) class Canvas(LRFStream): tag_map = { 0xF551: ['canvaswidth', 'W'], 0xF552: ['canvasheight', 'W'], 0xF5DA: ['', 'parse_waits'], + 0xF533: ['blockrule', 'W', {0x44: "block-fixed", 0x22: "block-adjustable"}], + 0xF534: ['bgcolor', 'D', Color], + 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], + 0xF536: ['framewidth', 'W'], + 0xF537: ['framecolor', 'D', Color], + 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], } - tag_map.update(BlockAttr.tag_map) tag_map.update(LRFStream.tag_map) + extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', + 'framewidth', 'framecolor', 'framemode'] def parse_waits(self, tag, f): val = tag.word self.setwaitprop = val&0xF self.setwaitsync = val&0xF0 + + def initialize(self): + self.attrs = {} + for attr in self.extra_attrs: + if hasattr(self, attr): + self.attrs[attr] = getattr(self, attr) + self._contents = [] + stream = cStringIO.StringIO(self.stream) + while stream.tell() < len(self.stream): + tag = Tag(stream) + self._contents.append(PutObj(*struct.unpack("\n' + for po in self: + s += unicode(po) + '\n' + s += '\n'%(self.__class__.__name__,) + return s + + def __iter__(self): + for i in self._contents: + yield i + +class Header(Canvas): + pass + +class Footer(Canvas): + pass + class ESound(LRFObject): pass @@ -273,9 +821,26 @@ class ImageStream(LRFStream): tag_map = { 0xF555: ['comment', 'P'], } + imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'} + tag_map.update(LRFStream.tag_map) + + @apply + def encoding(): + def fget(self): + return self.imgext[self.stream_flags & 0xFF].upper() + return property(fget=fget) + + def end_stream(self, *args): + LRFStream.end_stream(self, *args) + self.file = str(self.id) + '.' + self.encoding.lower() + self._document.image_map[self.id] = self + + def __unicode__(self): + return u'\n'%\ + (self.id, self.encoding, self.file) -class Import(LRFObject): +class Import(LRFStream): pass class Button(LRFObject): @@ -300,12 +865,12 @@ class Button(LRFObject): } tag_map.update(LRFObject.tag_map) - def __init__(self, stream, id, scramble_key, boundary): + def __init__(self, document, stream, id, scramble_key, boundary): self.xml = u'' self.refimage = {} self.actions = {} self.to_dump = True - LRFObject.__init__(self, stream, id, scramble_key, boundary) + LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def do_ref_image(self, tag, f): self.refimage[self.button_yype] = tag.dword @@ -341,6 +906,38 @@ class Button(LRFObject): def parse_run(self, tag, f): self.actions[self.button_type].append((5, struct.unpack("\n'%(self.id,) + if self.button_flags & 0x10 != 0: + s += '\n' + s += '\n'% self.jump_action(2) + s += '\n' + else: + raise LRFParseError('Unsupported button type') + s += '\n' + return s + + @apply + def refpage(): + def fget(self): + return self.jump_action(2)[0] + return property(fget=fget) + + @apply + def refobject(): + def fget(self): + return self.jump_action(2)[1] + return property(fget=fget) + class Window(LRFObject): pass @@ -356,35 +953,87 @@ class SoundStream(LRFObject): class Font(LRFStream): tag_map = { - 0xF559: ['fontFilename', 'P'], - 0xF55D: ['fontFacename', 'P'], + 0xF559: ['fontfilename', 'P'], + 0xF55D: ['fontfacename', 'P'], } tag_map.update(LRFStream.tag_map) + + def end_stream(self, *args): + LRFStream.end_stream(self, *args) + self._document.font_map[self.fontfacename] = self + self.file = self.fontfacename + '.ttf' + + def __unicode__(self): + s = '\n'%\ + (self.id, self.fontfilename, self.fontfacename, self.file) + return s class ObjectInfo(LRFObject): pass -class BookAttr(LRFObject): +class BookAttr(StyleObject, LRFObject): tag_map = { - 0xF57B: ['pageTreeId', 'D'], + 0xF57B: ['page_tree_id', 'D'], 0xF5D8: ['', 'add_font'], 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], } tag_map.update(LRFObject.tag_map) + binding_map = {1: 'Lr', 16 : 'Rl'} - def __init__(self, stream, id, scramble_key, boundary): + def __init__(self, document, stream, id, scramble_key, boundary): self.font_link_list = [] - LRFObject.__init__(self, stream, id, scramble_key, boundary) + LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def add_font(self, tag, f): self.font_link_list.append(tag.dword) + + def __unicode__(self): + s = u'\n'%(self.id, self.id) + doc = self._document + s += u'\n'%\ + (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) + for font in self._document.font_map.values(): + s += unicode(font) + s += '\n' + return s -class SimpleText(LRFObject): +class SimpleText(Text): pass +class TocLabel(object): + + def __init__(self, refpage, refobject, label): + self.refpage, self.refobject, self.label = refpage, refobject, label + + def __unicode__(self): + return u'%s\n'%(self.refpage, self.refobject, self.label) + class TOCObject(LRFStream): - pass + + def initialize(self): + stream = cStringIO.StringIO(self.stream) + c = struct.unpack(" 0: + refpage = struct.unpack("\n' + for i in self: + s += unicode(i) + return s + '\n' + object_map = [ None, #00 @@ -421,14 +1070,14 @@ object_map = [ ] -def get_object(stream, id, offset, size, scramble_key): +def get_object(document, stream, id, offset, size, scramble_key): stream.seek(offset) start_tag = Tag(stream) if start_tag.id != 0xF500: raise LRFParseError('Bad object start') obj_id, obj_type = struct.unpack("\n\n\n' + bookinfo += u'%s\n'%(self.title_reading, self.title) + bookinfo += u'%s\n'%(self.author_reading, self.author) + bookinfo += u'%s\n'%(self.book_id,) + bookinfo += u'%s\n'%(self.publisher,) + bookinfo += u'\n'%(self.label,) + bookinfo += u'%s\n'%(self.category,) + bookinfo += u'%s\n'%(self.classification,) + bookinfo += u'%s\n\n\n'%(self.free_text,) + th = self.thumbnail + if th: + bookinfo += u'\n'%(self.title+'_thumbnail.'+self.thumbail_extension(),) + open(self.title+'_thumbnail.'+self.thumbail_extension(), 'wb').write(th) + bookinfo += u'%s\n'%(self.language,) + bookinfo += u'%s\n'%(self.creator,) + bookinfo += u'%s\n'%(self.producer,) + bookinfo += u'%s\n\n\n\n'%(self.page,) + pages = u'' + done_main = False + pt_id = -1 + for page_tree in self: + if not done_main: + done_main = True + pages += u'
\n' + close = u'
\n' + pt_id = page_tree.id + else: + pages += u'\n'%(page_tree.id,) + close = u'\n' + for page in page_tree: + pages += unicode(page) + pages += close + traversed_objects = [int(i) for i in re.findall(r'objid="(\w+)"', pages)] + [pt_id] + + objects = u'\n\n' + styles = u'\n\n' + objects += '\n' + self.write_files() + return '\n' + bookinfo + pages + styles + objects + '' - def get_byte(self): - return struct.unpack("\n') + logger.info('Parsing LRF...') + d = LRFDocument(open(args[1], 'rb')) + logger.info('Creating XML...') + o.write(d.to_xml()) + logger.info('LRS written to '+opts.out) return 0 if __name__ == '__main__': diff --git a/src/libprs500/ebooks/lrf/tags.py b/src/libprs500/ebooks/lrf/tags.py index cc1a155b2d..61da0bf646 100644 --- a/src/libprs500/ebooks/lrf/tags.py +++ b/src/libprs500/ebooks/lrf/tags.py @@ -199,7 +199,7 @@ class Tag(object): self.offset = stream.tell() tag_id = struct.unpack("