## Copyright (C) 2007 Kovid Goyal kovid@kovidgoyal.net ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. import struct, array, zlib, cStringIO, collections, re from htmlentitydefs import name2codepoint from libprs500.ebooks.lrf import LRFParseError from libprs500.ebooks.lrf.tags import Tag ruby_tags = { 0xF575: ['rubyAlignAndAdjust', 'W'], 0xF576: ['rubyoverhang', 'W', {0: 'none', 1:'auto'}], 0xF577: ['empdotsposition', 'W', {1: 'before', 2:'after'}], 0xF578: ['','parse_empdots'], 0xF579: ['emplineposition', 'W', {1: 'before', 2:'after'}], 0xF57A: ['emplinetype', 'W', {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'}] } class LRFObject(object): tag_map = { 0xF500: ['', ''], 0xF502: ['infoLink', 'D'], 0xF501: ['', ''], } @classmethod def descramble_buffer(cls, buf, l, xorKey): i = 0 a = array.array('B',buf) while l>0: a[i] ^= xorKey i+=1 l-=1 return a.tostring() @classmethod def parse_empdots(self, tag, f): self.refEmpDotsFont, self.empDotsFontName, self.empDotsCode = tag.contents @staticmethod def tag_to_val(h, obj, tag, stream): if h[1] == 'D': val = tag.dword elif h[1] == 'W': val = tag.word elif h[1] == 'w': val = tag.word if val > 0x8000: val -= 0x10000 elif h[1] == 'B': val = tag.byte elif h[1] == 'P': val = tag.contents elif h[1] != '': val = getattr(obj, h[1])(tag, stream) if len(h) > 2: val = h[2](val) if callable(h[2]) else h[2][val] return val def __init__(self, document, stream, id, scramble_key, boundary): self._scramble_key = scramble_key self._document = document self.id = id while stream.tell() < boundary: tag = Tag(stream) self.handle_tag(tag, stream) def parse_bg_image(self, tag, f): self.bg_image_mode, self.bg_image_id = struct.unpack(" 0x400 and (isinstance(self, ImageStream) or isinstance(self, Font) or isinstance(self, SoundStream)): l = 0x400; self.stream = self.descramble_buffer(self.stream, l, key) if self.stream_flags & 0x100 !=0: decomp_size = struct.unpack("\n' return s def as_dict(self): d = {} for h in self.tag_map.values(): attr = h[0] if hasattr(self, attr): d[attr] = getattr(self, attr) return d class PageAttr(StyleObject, LRFObject): tag_map = { 0xF507: ['oddheaderid', 'D'], 0xF508: ['evenheaderid', 'D'], 0xF509: ['oddfooterid', 'D'], 0xF50A: ['evenfooterid', 'D'], 0xF521: ['topmargin', 'W'], 0xF522: ['headheight', 'W'], 0xF523: ['headsep', 'W'], 0xF524: ['oddsidemargin', 'W'], 0xF52C: ['evensidemargin', 'W'], 0xF525: ['textheight', 'W'], 0xF526: ['textwidth', 'W'], 0xF527: ['footspace', 'W'], 0xF528: ['footheight', 'W'], 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], 0xF52B: ['pageposition', 'W', {0: 'any', 1:'upper', 2: 'lower'}], 0xF52A: ['setemptyview', 'W', {1: 'show', 0: 'empty'}], 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], 0xF529: ['', "parse_bg_image"], } tag_map.update(LRFObject.tag_map) class Color(object): def __init__(self, val): self.a, self.r, self.g, self.b = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF def __unicode__(self): return u'0x%02x%02x%02x%02x'%(self.a, self.r, self.g, self.b) def __str__(self): return unicode(self) def __len__(self): return 4 def __getitem__(self, i): # Qt compatible ordering and values return (self.r, self.g, self.b, 0xff-self.a)[i] # In Qt 0xff is opaque while in LRS 0x00 is opaque class EmptyPageElement(object): def __iter__(self): for i in range(0): yield i def __str__(self): return unicode(self) class PageDiv(EmptyPageElement): def __init__(self, pain, spacesize, linewidth, linecolor): self.pain, self.spacesize, self.linewidth = pain, spacesize, linewidth self.linecolor = Color(linecolor) def __unicode__(self): return u'\n\n'%\ (self.pain, self.spacesize, self.linewidth, self.color) class RuledLine(EmptyPageElement): linetype_map = {0x00: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted', 0x13: 'unknown13'} def __init__(self, linelength, linetype, linewidth, linecolor): self.linelength, self.linewidth = linelength, linewidth self.linetype = self.linetype_map[linetype] self.linecolor = Color(linecolor) self.id = -1 def __unicode__(self): return u'\n\n'%\ (self.linelength, self.linetype, self.linewidth, self.linecolor) class Wait(EmptyPageElement): def __init__(self, time): self.time = time def __unicode__(self): return u'\n\n'%(self.time) class Locate(EmptyPageElement): pos_map = {1:'bottomleft', 2:'bottomright',3:'topright',4:'topleft', 5:'base'} def __init__(self, pos): print pos self.pos = self.pos_map[pos] def __unicode__(self): return u'\n\n'%(self.pos) class BlockSpace(EmptyPageElement): def __init__(self, xspace, yspace): self.xspace, self.yspace = xspace, yspace def __unicode__(self): return u'\n\n'%\ (self.xspace, self.yspace) class Page(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], 0xF50B: ['obj_list', 'P'], 0xF571: ['', ''], 0xF57C: ['parent_page_tree','D'], } tag_map.update(PageAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget=lambda self : self._document.objects[self.style_id]) evenheader = property(fget=lambda self : self._document.objects[self.style.evenheaderid]) evenfooter = property(fget=lambda self : self._document.objects[self.style.evenfooterid]) oddheader = property(fget=lambda self : self._document.objects[self.style.oddheaderid]) oddfooter = property(fget=lambda self : self._document.objects[self.style.oddfooterid]) class Content(LRFContentObject): tag_map = { 0xF503: 'link', 0xF54E: 'page_div', 0xF547: 'x_space', 0xF546: 'y_space', 0xF548: 'do_pos', 0xF573: 'ruled_line', 0xF5D4: 'wait', 0xF5D6: 'sound_stop', } def __init__(self, bytes, objects): self.in_blockspace = False LRFContentObject.__init__(self, bytes, objects) def link(self, tag): self.close_blockspace() self._contents.append(self.objects[tag.dword]) def page_div(self, tag): self.close_blockspace() pars = struct.unpack("\n'%(self.style_id, self.id) for i in self: s += unicode(i) s += '\n\n' return s def __str__(self): return unicode(self) class BlockAttr(StyleObject, LRFObject): tag_map = { 0xF531: ['blockwidth', 'W'], 0xF532: ['blockheight', 'W'], 0xF533: ['blockrule', 'W', {0x14: "horz-fixed", 0x12: "horz-adjustable", 0x41: "vert-fixed", 0x21: "vert-adjustable", 0x44: "block-fixed", 0x22: "block-adjustable"}], 0xF534: ['bgcolor', 'D', Color], 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], 0xF536: ['framewidth', 'W'], 0xF537: ['framecolor', 'D', Color], 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], 0xF538: ['topskip', 'W'], 0xF539: ['sidemargin', 'W'], 0xF53A: ['footskip', 'W'], 0xF529: ['', 'parse_bg_image'], } tag_map.update(LRFObject.tag_map) class TextAttr(StyleObject, LRFObject): tag_map = { 0xF511: ['fontsize', 'w'], 0xF512: ['fontwidth', 'w'], 0xF513: ['fontescapement', 'w'], 0xF514: ['fontorientation', 'w'], 0xF515: ['fontweight', 'W'], 0xF516: ['fontfacename', 'P'], 0xF517: ['textcolor', 'D', Color], 0xF518: ['textbgcolor', 'D', Color], 0xF519: ['wordspace', 'w'], 0xF51A: ['letterspace', 'w'], 0xF51B: ['baselineskip', 'w'], 0xF51C: ['linespace', 'w'], 0xF51D: ['parindent', 'w'], 0xF51E: ['parskip', 'w'], 0xF53C: ['align', 'W', {1: 'head', 4: 'center', 8: 'foot'}], 0xF53D: ['column', 'W'], 0xF53E: ['columnsep', 'W'], 0xF5DD: ['charspace', 'w'], 0xF5F1: ['textlinewidth', 'W'], 0xF5F2: ['linecolor', 'D', Color], } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) class Block(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], } tag_map.update(BlockAttr.tag_map) tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) extra_attrs = [i[0] for i in BlockAttr.tag_map.values()] extra_attrs.extend([i[0] for i in TextAttr.tag_map.values()]) style = property(fget=lambda self : self._document.objects[self.style_id]) textstyle = property(fget=lambda self : self._document.objects[self.textstyle_id]) def initialize(self): self.attrs = {} stream = cStringIO.StringIO(self.stream) tag = Tag(stream) if tag.id != 0xF503: raise LRFParseError("Bad block content") obj = self._document.objects[tag.dword] if isinstance(obj, SimpleText): self.name = 'SimpleTextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Text): self.name = 'TextBlock' self.textstyle_id = obj.style_id elif isinstance(obj, Image): self.name = 'ImageBlock' for attr in ('x0', 'x1', 'y0', 'y1', 'xsize', 'ysize', 'refstream'): self.attrs[attr] = getattr(obj, attr) self.refstream = self._document.objects[self.attrs['refstream']] elif isinstance(obj, Button): self.name = 'ButtonBlock' else: raise LRFParseError("Unexpected block type: "+obj.__class__.__name__) self.content = obj for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) def __unicode__(self): s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): s += 'textstyle="%d" '%(self.textstyle_id,) for attr in self.attrs: s += '%s="%s" '%(attr, self.attrs[attr]) s = s.rstrip()+'>\n' s += unicode(self.content) s += '\n'%(self.name,) return s class MiniPage(LRFStream): tag_map = { 0xF541: ['minipagewidth', 'W'], 0xF542: ['minipageheight', 'W'], } tag_map.update(LRFStream.tag_map) tag_map.update(BlockAttr.tag_map) class Text(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], } tag_map.update(TextAttr.tag_map) tag_map.update(LRFStream.tag_map) style = property(fget=lambda self : self._document.objects[self.style_id]) text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' } entity_pattern = re.compile(r'&(\S+?);') text_tags = { 0xF581: ['simple_container', 'Italic'], 0xF582: 'end_container', 0xF5B1: ['simple_container', 'Yoko'], 0xF5B2: 'end_container', 0xF5B3: ['simple_container', 'Tate'], 0xF5B4: 'end_container', 0xF5B5: ['simple_container', 'Nekase'], 0xF5B6: 'end_container', 0xF5A1: 'start_para', 0xF5A2: 'end_para', 0xF5A7: 'char_button', 0xF5A8: 'end_container', 0xF5A9: ['simple_container', 'Rubi'], 0xF5AA: 'end_container', 0xF5AB: ['simple_container', 'Oyamoji'], 0xF5AC: 'end_container', 0xF5AD: ['simple_container', 'Rubimoji'], 0xF5AE: 'end_container', 0xF5B7: ['simple_container', 'Sup'], 0xF5B8: 'end_container', 0xF5B9: ['simple_container', 'Sub'], 0xF5BA: 'end_container', 0xF5BB: ['simple_container', 'NoBR'], 0xF5BC: 'end_container', 0xF5BD: ['simple_container', 'EmpDots'], 0xF5BE: 'end_container', 0xF5C1: 'empline', 0xF5C2: 'end_container', 0xF5C3: 'draw_char', 0xF5C4: 'end_container', 0xF5C6: 'box', 0xF5C7: 'end_container', 0xF5CA: 'space', 0xF5D1: 'plot', 0xF5D2: 'cr', } class TextTag(object): def __init__(self, name, attrs={}, self_closing=False): self.name = name self.attrs = attrs self.self_closing = self_closing def __unicode__(self): s = u'<%s '%(self.name,) for name, val in self.attrs.items(): s += '%s="%s" '%(name, val) return s.rstrip() + (u' />' if self.self_closing else u'>') + (u'\n' if self.name in ('P', 'CR') else u'') class Span(TextTag): pass linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} lineposition_map = {1:'before', 2:'after'} def handle_entity(self, match): ent = match.group(1) if ent.startswith(u'#x'): return unichr(int(ent[2:], 16)) if ent.startswith(u'#'): return unichr(int(ent[1:])) return unichr(name2codepoint[ent]) def add_text(self, text): s = unicode(text, "utf-16-le") if s: s = s.translate(self.text_map) self.content.append(self.entity_pattern.sub(self.handle_entity, s)) def end_container(self, tag, stream): self.content.append(None) def start_para(self, tag, stream): self.content.append(self.__class__.TextTag('P')) def close_containers(self, start=0): open_containers = 0 if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content.pop() while start < len(self.content): c = self.content[start] if c is None: open_containers -= 1 elif isinstance(c, self.__class__.TextTag) and not c.self_closing: open_containers += 1 start += 1 self.content.extend(None for i in range(open_containers)) def end_para(self, tag, stream): i = len(self.content)-1 while i > -1: if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P': break i -= 1 self.close_containers(start=i) def cr(self, tag, stream): self.content.append(self.__class__.TextTag('CR', self_closing=True)) def char_button(self, tag, stream): self.content.append(self.__class__.TextTag( 'CharButton', attrs={'refobj':tag.dword})) def simple_container(self, tag, name): self.content.append(self.__class__.TextTag(name)) def empline(self, tag, stream): def invalid(op): stream.seek(op) self.simple_container(None, 'EmpLine') oldpos = stream.tell() try: t = Tag(stream) if t.id not in [0xF579, 0xF57A]: raise LRFParseError except LRFParseError: invalid(oldpos) return h = TextAttr.tag_map[t.id] attrs = {} attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) oldpos = stream.tell() try: t = Tag(stream) if t.id not in [0xF579, 0xF57A]: raise LRFParseError h = TextAttr.tag_map[t.id] attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) except LRFParseError: stream.seek(oldpos) self.content.append(self.__class__.TextTag( 'EmpLine', attrs=attrs)) def space(self, tag, stream): self.content.append(self.__class__.TextTag('Space', attrs={'xsize':tag.sword}, self_closing=True)) def plot(self, tag, stream): xsize, ysize, refobj, adjustment = struct.unpack(" 0: self.add_text(self.stream[stream.tell():pos]) stream.seek(pos) tag = Tag(stream) if tag.id == 0xF5CC: self.add_text(stream.read(tag.word)) elif tag.id in self.__class__.text_tags: # A Text tag action = self.__class__.text_tags[tag.id] if isinstance(action, basestring): getattr(self, action)(tag, stream) else: getattr(self, action[0])(tag, action[1]) elif tag.id in TextAttr.tag_map: # A Span attribute action = TextAttr.tag_map[tag.id] if len(self.content) == 0: current_style = style.copy() name, val = action[0], LRFObject.tag_to_val(action, None, tag, None) if current_style[name] != val: # No existing Span if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span): self.content[-1].attrs[name] = val else: self.content.append(self.__class__.Span('Span', {name:val})) current_style[name] = val if len(self.content) > 0: self.close_containers() self.stream = None def __unicode__(self): s = u'' open_containers = collections.deque() for c in self.content: if isinstance(c, basestring): s += c elif c is None: p = open_containers.pop() nl = u'\n' if p.name == 'P' else u'' s += nl + u''%(p.name,) + nl else: s += unicode(c) if not c.self_closing: open_containers.append(c) if len(open_containers) > 0: raise LRFParseError('Malformed text stream %s'%([i.name for i in open_containers if isinstance(i, Text.TextTag)],)) return s class Image(LRFObject): tag_map = { 0xF54A: ['', 'parse_image_rect'], 0xF54B: ['', 'parse_image_size'], 0xF54C: ['refstream', 'D'], 0xF555: ['comment', 'P'], } def parse_image_rect(self, tag, f): self.x0, self.y0, self.x1, self.y1 = struct.unpack("\n'%\ (self.id, self.x0, self.y0, self.x1, self.y1, self.xsize, self.ysize, self.refstream) class PutObj(EmptyPageElement): def __init__(self, objects, x, y, refobj): self.x, self.y, self.refobj = x, y, refobj self.object = objects[refobj] def __unicode__(self): return u''%(self.x, self.y, self.refobj) class Canvas(LRFStream): tag_map = { 0xF551: ['canvaswidth', 'W'], 0xF552: ['canvasheight', 'W'], 0xF5DA: ['', 'parse_waits'], 0xF533: ['blockrule', 'W', {0x44: "block-fixed", 0x22: "block-adjustable"}], 0xF534: ['bgcolor', 'D', Color], 0xF535: ['layout', 'W', {0x41: 'TbRl', 0x34: 'LrTb'}], 0xF536: ['framewidth', 'W'], 0xF537: ['framecolor', 'D', Color], 0xF52E: ['framemode', 'W', {0: 'none', 2: 'curve', 1:'square'}], } tag_map.update(LRFStream.tag_map) extra_attrs = ['canvaswidth', 'canvasheight', 'blockrule', 'layout', 'framewidth', 'framecolor', 'framemode'] def parse_waits(self, tag, f): val = tag.word self.setwaitprop = val&0xF self.setwaitsync = val&0xF0 def initialize(self): self.attrs = {} for attr in self.extra_attrs: if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) self._contents = [] stream = cStringIO.StringIO(self.stream) while stream.tell() < len(self.stream): tag = Tag(stream) self._contents.append(PutObj(self._document.objects, *struct.unpack("\n' for po in self: s += unicode(po) + '\n' s += '\n'%(self.__class__.__name__,) return s def __iter__(self): for i in self._contents: yield i class Header(Canvas): pass class Footer(Canvas): pass class ESound(LRFObject): pass class ImageStream(LRFStream): tag_map = { 0xF555: ['comment', 'P'], } imgext = {0x11: 'jpeg', 0x12: 'png', 0x13: 'bmp', 0x14: 'gif'} tag_map.update(LRFStream.tag_map) encoding = property(fget=lambda self : self.imgext[self.stream_flags & 0xFF].upper()) def end_stream(self, *args): LRFStream.end_stream(self, *args) self.file = str(self.id) + '.' + self.encoding.lower() self._document.image_map[self.id] = self def __unicode__(self): return u'\n'%\ (self.id, self.encoding, self.file) class Import(LRFStream): pass class Button(LRFObject): tag_map = { 0xF503: ['', 'do_ref_image'], 0xF561: ['button_flags','W'], #\n' return s refpage = property(fget=lambda self : self.jump_action(2)[0]) refobj = property(fget=lambda self : self.jump_action(2)[1]) class Window(LRFObject): pass class PopUpWin(LRFObject): pass class Sound(LRFObject): pass class SoundStream(LRFObject): pass class Font(LRFStream): tag_map = { 0xF559: ['fontfilename', 'P'], 0xF55D: ['fontfacename', 'P'], } tag_map.update(LRFStream.tag_map) data = property(fget=lambda self: self.stream) def end_stream(self, *args): LRFStream.end_stream(self, *args) self._document.font_map[self.fontfacename] = self self.file = self.fontfacename + '.ttf' def __unicode__(self): s = '\n'%\ (self.id, self.fontfilename, self.fontfacename, self.file) return s class ObjectInfo(LRFStream): pass class BookAttr(StyleObject, LRFObject): tag_map = { 0xF57B: ['page_tree_id', 'D'], 0xF5D8: ['', 'add_font'], 0xF5DA: ['setwaitprop', 'W', {1: 'replay', 2: 'noreplay'}], } tag_map.update(ruby_tags) tag_map.update(LRFObject.tag_map) binding_map = {1: 'Lr', 16 : 'Rl'} def __init__(self, document, stream, id, scramble_key, boundary): self.font_link_list = [] LRFObject.__init__(self, document, stream, id, scramble_key, boundary) def add_font(self, tag, f): self.font_link_list.append(tag.dword) def __unicode__(self): s = u'\n'%(self.id, self.id) s += u'\n'%(self._tags_to_xml(),) doc = self._document s += u'\n'%\ (self.binding_map[doc.binding], doc.dpi, doc.width, doc.height, doc.color_depth) for font in self._document.font_map.values(): s += unicode(font) s += '\n' return s class SimpleText(Text): pass class TocLabel(object): def __init__(self, refpage, refobject, label): self.refpage, self.refobject, self.label = refpage, refobject, label def __unicode__(self): return u'%s\n'%(self.refpage, self.refobject, self.label) class TOCObject(LRFStream): def initialize(self): stream = cStringIO.StringIO(self.stream) c = struct.unpack(" 0: refpage = struct.unpack("\n' for i in self: s += unicode(i) return s + '\n' object_map = [ None, #00 PageTree, #01 Page, #02 Header, #03 Footer, #04 PageAttr, #05 Block, #06 BlockAttr, #07 MiniPage, #08 None, #09 Text, #0A TextAttr, #0B Image, #0C Canvas, #0D ESound, #0E None, #0F None, #10 ImageStream,#11 Import, #12 Button, #13 Window, #14 PopUpWin, #15 Sound, #16 SoundStream,#17 None, #18 Font, #19 ObjectInfo, #1A None, #1B BookAttr, #1C SimpleText, #1D TOCObject, #1E ] def get_object(document, stream, id, offset, size, scramble_key): stream.seek(offset) start_tag = Tag(stream) if start_tag.id != 0xF500: raise LRFParseError('Bad object start') obj_id, obj_type = struct.unpack("