diff --git a/src/libprs500/ebooks/lrf/objects.py b/src/libprs500/ebooks/lrf/objects.py index a7d28e99ac..4cc48ba12d 100644 --- a/src/libprs500/ebooks/lrf/objects.py +++ b/src/libprs500/ebooks/lrf/objects.py @@ -12,7 +12,7 @@ ## You should have received a copy of the GNU General Public License along ## with this program; if not, write to the Free Software Foundation, Inc., ## 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -import struct, array, zlib, cStringIO +import struct, array, zlib, cStringIO, collections from libprs500.ebooks.lrf import LRFParseError from libprs500.ebooks.lrf.tags import Tag @@ -512,13 +512,6 @@ class Block(LRFStream): if hasattr(self, attr): self.attrs[attr] = getattr(self, attr) - def __iter__(self): - try: - for i in iter(self.content): - yield i - except TypeError: - yield self.content - def __unicode__(self): s = u'\n<%s objid="%d" blockstyle="%d" '%(self.name, self.id, self.style_id) if hasattr(self, 'textstyle_id'): @@ -526,9 +519,7 @@ class Block(LRFStream): for attr in self.attrs: s += '%s="%s" '%(attr, self.attrs[attr]) s = s.rstrip()+'>\n' - if self.name != 'ImageBlock': - for i in self: - s += unicode(i) + s += unicode(self.content) s += '\n'%(self.name,) return s @@ -541,6 +532,8 @@ class MiniPage(LRFStream): tag_map.update(LRFStream.tag_map) tag_map.update(BlockAttr.tag_map) + + class Text(LRFStream): tag_map = { 0xF503: ['style_id', 'D'], @@ -550,8 +543,9 @@ class Text(LRFStream): style = property(fget=lambda self : self._document.objects[self.style_id]) - class Content(LRFContentObject): - tag_map = { + text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' } + + text_tags = { 0xF581: ['simple_container', 'Italic'], 0xF582: 'end_container', 0xF5B1: ['simple_container', 'Yoko'], @@ -585,194 +579,183 @@ class Text(LRFStream): 0xF5C6: 'box', 0xF5C7: 'end_container', 0xF5CA: 'space', - 0xF5CC: 'string', 0xF5D1: 'plot', 0xF5D2: 'cr', } + + class TextTag(object): - text_map = { 0x22: u'"', 0x26: u'&', 0x27: u'&squot;', 0x3c: u'<', 0x3e: u'>' } - linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} - adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} - lineposition_map = {1:'before', 2:'after'} - - def __init__(self, bytes, objects, parent=None, name=None, attrs={}): - self.parent = parent + def __init__(self, name, attrs={}, self_closing=False): self.name = name self.attrs = attrs - LRFContentObject.__init__(self, bytes, objects) - - def parse_stream(self, length): - offset = self.stream.tell() - while self.in_container and offset < length: - buf = self.stream.getvalue()[offset:] - pos = buf.find('\xf5') - 1 - if pos > 0: - self.stream.seek(offset+pos) - self.add_text(buf[:pos]) - self.handle_tag(Tag(self.stream)) - offset = self.stream.tell() - - def handle_tag(self, tag): - if tag.id in self.tag_map: - action = self.tag_map[tag.id] - if isinstance(action, basestring): - func, args = action, tuple([]) - else: - func, args = action[0], (action[1],) - getattr(self, func)(tag, *args) - elif tag.id in TextAttr.tag_map: - h = TextAttr.tag_map[tag.id] - val = LRFObject.tag_to_val(h, None, tag, self.stream) - if self.name == 'Span': - if h[0] not in self.attrs: - self.attrs[h[0]] = val - elif val != self.attrs[h[0]]: - if self._contents: - self.parent._contents.append(self) - Text.Content(self.stream, self.objects, self.parent, - 'Span', {h[0]: val}) - - - else: - Text.Content(self.stream, self.objects, self, - 'Span', {h[0]: val}) - - else: - raise LRFParseError('Unknown tag in text stream %s'&(tag,)) - - - def simple_container(self, tag, name): - cont = Text.Content(self.stream, self.objects, parent=self, name=name) - self._contents.append(cont) - - def end_container(self, *args): - self.in_container = False - if self.name == 'Span' and self._contents and self not in self.parent._contents: - self.parent._contents.append(self) - - def end_to_root(self): - parent = self - while parent: - parent.end_container() - parent = parent.parent - - def root(self): - root = self - while root.parent: - root = root.parent - return root - - def start_para(self, tag): - self.end_to_root() - root = self.root() - root.in_container = True - - p = Text.Content(self.stream, self.objects, parent=root, name='P') - root._contents.append(p) - - def end_para(self, tag): - self.end_to_root() - root = self.root() - root.in_container = True - - def cr(self, tag): - self._contents.append(Text.Content('', self.objects, parent=self, name='CR')) - - def char_button(self, tag): - self._contents.append(Text.Content(self.stream, self.objects, parent=self, - name='CharButton', attrs={'refobj':tag.dword})) - - def empline(self, tag): - - def invalid(op): - self.stream.seek(op) - self.simple_container('EmpLine') - - oldpos = self.stream.tell() - try: - t = Tag(self.stream) - if t.id not in [0xF579, 0xF57A]: - raise LRFParseError - except LRFParseError: - invalid(oldpos) - return - h = TextAttr.tag_map[t.id] - attrs = {} - attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) - oldpos = self.stream.tell() - try: - t = Tag(self.stream) - if t.id not in [0xF579, 0xF57A]: - raise LRFParseError - h = TextAttr.tag_map[t.id] - attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) - except LRFParseError: - self.stream.seek(oldpos) - - cont = Text.Content(self.stream, self.objects, parent=self, - name='EmpLine', attrs=attrs) - self._contents.append(cont) - - def space(self, tag): - self._contents.append(Text.Content('', self.objects, parent=self, - name='Space', attrs={'xsize':tag.sword})) - - def string(self, tag): - strlen = tag.word - self.add_text(self.stream.read(strlen)) - - def add_text(self, text): - s = unicode(text, "utf-16-le") - self._contents.append(s.translate(self.text_map)) - - def plot(self, tag): - xsize, ysize, refobj, adjustment = struct.unpack("' - if self.name is None: - return children - return s + u'>' + children + ''%(self.name,) + ('\n' if self.name == 'P' else '') + s = u'<%s '%(self.name,) + for name, val in self.attrs.items(): + s += '%s="%s" '%(name, val) + return s.rstrip() + (u' />' if self.self_closing else u'>') + (u'\n' if self.name in ('P', 'CR') else u'') + + linetype_map = {0: 'none', 0x10: 'solid', 0x20: 'dashed', 0x30: 'double', 0x40: 'dotted'} + adjustment_map = {1: 'top', 2: 'center', 3: 'baseline', 4: 'bottom'} + lineposition_map = {1:'before', 2:'after'} + + + def add_text(self, text): + s = unicode(text, "utf-16-le") + if s: + self.containers.append(s.translate(self.text_map)) + + def empty_containers(self): + open_containers = 0 + while len(self.containers) > 0: + c = self.containers.popleft() + self.content.append(c) + if c is None: + open_containers -= 1 + elif isinstance(c, self.__class__.TextTag) and not c.self_closing: + open_containers += 1 + while open_containers > 0: + self.content.append(None) + open_containers -= 1 + + def end_container(self, tag, stream): + self.containers.append(None) + + def start_para(self, tag, stream): + self.empty_containers() + self.containers.append(self.__class__.TextTag('P')) - def __str__(self): - return unicode(self).encode('utf-8') + def end_para(self, tag, stream): + self.empty_containers() + + def cr(self, tag, stream): + self.containers.append(self.__class__.TextTag('CR', self_closing=True)) + + def char_button(self, tag, stream): + self.containers.append(self.__class__.TextTag( + 'CharButton', attrs={'refobj':tag.dword})) + + def simple_container(self, tag, name): + self.containers.append(self.__class__.TextTag(name)) + + def empline(self, tag, stream): + + def invalid(op): + stream.seek(op) + self.simple_container('EmpLine') + + oldpos = stream.tell() + try: + t = Tag(stream) + if t.id not in [0xF579, 0xF57A]: + raise LRFParseError + except LRFParseError: + invalid(oldpos) + return + h = TextAttr.tag_map[t.id] + attrs = {} + attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) + oldpos = stream.tell() + try: + t = Tag(stream) + if t.id not in [0xF579, 0xF57A]: + raise LRFParseError + h = TextAttr.tag_map[t.id] + attrs[h[0]] = TextAttr.tag_to_val(h, None, t, None) + except LRFParseError: + stream.seek(oldpos) + + self.containers.append(self.__class__.TextTag( + 'EmpLine', attrs=attrs)) + + def space(self, tag, stream): + self.containers.append(self.__class__.TextTag('Space', + attrs={'xsize':tag.sword}, + self_closing=True)) + + def plot(self, tag, stream): + xsize, ysize, refobj, adjustment = struct.unpack(" 0: + self.add_text(self.stream[stream.tell():pos]) + stream.seek(pos) + + tag = Tag(stream) + + if tag.id == 0xF5CC: + self.add_text(stream.read(tag.word)) + elif tag.id in self.__class__.text_tags: # A Text tag + action = self.__class__.text_tags[tag.id] + if isinstance(action, basestring): + getattr(self, action)(tag, stream) + else: + getattr(self, action[0])(tag, action[1]) + elif tag.id in TextAttr.tag_map: # A Span attribute + action = TextAttr.tag_map[tag.id] + if len(self.containers) == 0: + previous_span = None + name, val = action[0], LRFObject.tag_to_val(action, None, tag, None) + if previous_span is None: + # No existing Span so start a new one + previous_span = self.__class__.TextTag('Span', {name:val}) + self.containers.append(previous_span) + else: + # Already in a Span + if name in previous_span.attrs: + # Start new Span + if hasattr(self.containers[-1], 'name') and self.containers[-1].name == 'Span': + self.containers.pop() + else: + self.empty_containers() + previous_span = self.__class__.TextTag('Span', {name:val}) + self.containers.append(previous_span) + else: + # Add attribute to current span + previous_span.attrs[name] = val + + self.stream = None + + def __unicode__(self): + s = u'' + open_containers = collections.deque() + for c in self.content: + if isinstance(c, basestring): + s += c + elif c is None: + p = open_containers.pop() + s += u''%(p.name,) + else: + s += unicode(c) + if not c.self_closing: + open_containers.append(c) + + if len(open_containers) > 0: + raise LRFParseError('Malformed text stream') + return s class Image(LRFObject):