Another refactoring of text stream processing to make it even faster and to correctly handle text tags outside <p> elements.

This commit is contained in:
Kovid Goyal 2007-09-22 22:49:22 +00:00
parent 0fbf3cb29a
commit 5c369f7d99

View File

@ -608,41 +608,45 @@ class Text(LRFStream):
def add_text(self, text): def add_text(self, text):
s = unicode(text, "utf-16-le") s = unicode(text, "utf-16-le")
if s: if s:
self.containers.append(s.translate(self.text_map)) self.content.append(s.translate(self.text_map))
def empty_containers(self): def end_container(self, tag, stream):
self.content.append(None)
def start_para(self, tag, stream):
self.content.append(self.__class__.TextTag('P'))
def close_containers(self, start=0):
open_containers = 0 open_containers = 0
if len(self.containers) > 0 and isinstance(self.containers[-1], self.__class__.Span): if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
self.containers.pop() self.content.pop()
while len(self.containers) > 0: while start < len(self.content):
c = self.containers.popleft() c = self.content[start]
self.content.append(c)
if c is None: if c is None:
open_containers -= 1 open_containers -= 1
elif isinstance(c, self.__class__.TextTag) and not c.self_closing: elif isinstance(c, self.__class__.TextTag) and not c.self_closing:
open_containers += 1 open_containers += 1
start += 1
self.content.extend(None for i in range(open_containers)) self.content.extend(None for i in range(open_containers))
def end_container(self, tag, stream):
self.containers.append(None)
def start_para(self, tag, stream):
self.empty_containers()
self.containers.append(self.__class__.TextTag('P'))
def end_para(self, tag, stream): def end_para(self, tag, stream):
self.empty_containers() i = len(self.content)-1
while i > -1:
if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P':
break
i -= 1
self.close_containers(start=i)
def cr(self, tag, stream): def cr(self, tag, stream):
self.containers.append(self.__class__.TextTag('CR', self_closing=True)) self.content.append(self.__class__.TextTag('CR', self_closing=True))
def char_button(self, tag, stream): def char_button(self, tag, stream):
self.containers.append(self.__class__.TextTag( self.content.append(self.__class__.TextTag(
'CharButton', attrs={'refobj':tag.dword})) 'CharButton', attrs={'refobj':tag.dword}))
def simple_container(self, tag, name): def simple_container(self, tag, name):
self.containers.append(self.__class__.TextTag(name)) self.content.append(self.__class__.TextTag(name))
def empline(self, tag, stream): def empline(self, tag, stream):
@ -671,11 +675,11 @@ class Text(LRFStream):
except LRFParseError: except LRFParseError:
stream.seek(oldpos) stream.seek(oldpos)
self.containers.append(self.__class__.TextTag( self.content.append(self.__class__.TextTag(
'EmpLine', attrs=attrs)) 'EmpLine', attrs=attrs))
def space(self, tag, stream): def space(self, tag, stream):
self.containers.append(self.__class__.TextTag('Space', self.content.append(self.__class__.TextTag('Space',
attrs={'xsize':tag.sword}, attrs={'xsize':tag.sword},
self_closing=True)) self_closing=True))
@ -685,18 +689,17 @@ class Text(LRFStream):
{'xsize': xsize, 'ysize': ysize, 'refobj':refobj, {'xsize': xsize, 'ysize': ysize, 'refobj':refobj,
'adjustment':self.adjustment_map[adjustment]}, self_closing=True) 'adjustment':self.adjustment_map[adjustment]}, self_closing=True)
plot.refobj = self._document.objects[refobj] plot.refobj = self._document.objects[refobj]
self.containers.append(plot) self.content.append(plot)
def draw_char(self, tag, stream): def draw_char(self, tag, stream):
self.containers.append(self.__class__.TextTag('DrawChar', {'line':tag.word})) self.content.append(self.__class__.TextTag('DrawChar', {'line':tag.word}))
def box(self, tag, stream): def box(self, tag, stream):
self.containers.append(self.__class__.TextTag('Box', self.content.append(self.__class__.TextTag('Box',
{'linetype':self.linetype_map[tag.word]})) {'linetype':self.linetype_map[tag.word]}))
def initialize(self): def initialize(self):
self.content = collections.deque() self.content = collections.deque()
self.containers = collections.deque()
stream = cStringIO.StringIO(self.stream) stream = cStringIO.StringIO(self.stream)
length = len(self.stream) length = len(self.stream)
style = self.style.as_dict() style = self.style.as_dict()
@ -722,18 +725,18 @@ class Text(LRFStream):
getattr(self, action[0])(tag, action[1]) getattr(self, action[0])(tag, action[1])
elif tag.id in TextAttr.tag_map: # A Span attribute elif tag.id in TextAttr.tag_map: # A Span attribute
action = TextAttr.tag_map[tag.id] action = TextAttr.tag_map[tag.id]
if len(self.containers) == 0: if len(self.content) == 0:
current_style = style.copy() current_style = style.copy()
name, val = action[0], LRFObject.tag_to_val(action, None, tag, None) name, val = action[0], LRFObject.tag_to_val(action, None, tag, None)
if current_style[name] != val: if current_style[name] != val:
# No existing Span # No existing Span
if len(self.containers) > 0 and isinstance(self.containers[-1], self.__class__.Span): if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
self.containers[-1].attrs[name] = val self.content[-1].attrs[name] = val
else: else:
self.containers.append(self.__class__.Span('Span', {name:val})) self.content.append(self.__class__.Span('Span', {name:val}))
current_style[name] = val current_style[name] = val
if self.containers: if len(self.content) > 0:
self.empty_containers() self.close_containers()
self.stream = None self.stream = None
def __unicode__(self): def __unicode__(self):
@ -743,7 +746,6 @@ class Text(LRFStream):
if isinstance(c, basestring): if isinstance(c, basestring):
s += c s += c
elif c is None: elif c is None:
if len(open_containers) > 0: #for malformed text streams like those produced by BookDesigner
p = open_containers.pop() p = open_containers.pop()
nl = u'\n' if p.name == 'P' else u'' nl = u'\n' if p.name == 'P' else u''
s += nl + u'</%s>'%(p.name,) + nl s += nl + u'</%s>'%(p.name,) + nl