mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Another refactoring of text stream processing to make it even faster and to correctly handle text tags outside <p> elements.
This commit is contained in:
parent
0fbf3cb29a
commit
5c369f7d99
@ -608,41 +608,45 @@ class Text(LRFStream):
|
|||||||
def add_text(self, text):
|
def add_text(self, text):
|
||||||
s = unicode(text, "utf-16-le")
|
s = unicode(text, "utf-16-le")
|
||||||
if s:
|
if s:
|
||||||
self.containers.append(s.translate(self.text_map))
|
self.content.append(s.translate(self.text_map))
|
||||||
|
|
||||||
def empty_containers(self):
|
def end_container(self, tag, stream):
|
||||||
|
self.content.append(None)
|
||||||
|
|
||||||
|
def start_para(self, tag, stream):
|
||||||
|
self.content.append(self.__class__.TextTag('P'))
|
||||||
|
|
||||||
|
def close_containers(self, start=0):
|
||||||
open_containers = 0
|
open_containers = 0
|
||||||
if len(self.containers) > 0 and isinstance(self.containers[-1], self.__class__.Span):
|
if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
|
||||||
self.containers.pop()
|
self.content.pop()
|
||||||
while len(self.containers) > 0:
|
while start < len(self.content):
|
||||||
c = self.containers.popleft()
|
c = self.content[start]
|
||||||
self.content.append(c)
|
|
||||||
if c is None:
|
if c is None:
|
||||||
open_containers -= 1
|
open_containers -= 1
|
||||||
elif isinstance(c, self.__class__.TextTag) and not c.self_closing:
|
elif isinstance(c, self.__class__.TextTag) and not c.self_closing:
|
||||||
open_containers += 1
|
open_containers += 1
|
||||||
|
start += 1
|
||||||
self.content.extend(None for i in range(open_containers))
|
self.content.extend(None for i in range(open_containers))
|
||||||
|
|
||||||
|
|
||||||
def end_container(self, tag, stream):
|
|
||||||
self.containers.append(None)
|
|
||||||
|
|
||||||
def start_para(self, tag, stream):
|
|
||||||
self.empty_containers()
|
|
||||||
self.containers.append(self.__class__.TextTag('P'))
|
|
||||||
|
|
||||||
def end_para(self, tag, stream):
|
def end_para(self, tag, stream):
|
||||||
self.empty_containers()
|
i = len(self.content)-1
|
||||||
|
while i > -1:
|
||||||
|
if isinstance(self.content[i], Text.TextTag) and self.content[i].name == 'P':
|
||||||
|
break
|
||||||
|
i -= 1
|
||||||
|
self.close_containers(start=i)
|
||||||
|
|
||||||
def cr(self, tag, stream):
|
def cr(self, tag, stream):
|
||||||
self.containers.append(self.__class__.TextTag('CR', self_closing=True))
|
self.content.append(self.__class__.TextTag('CR', self_closing=True))
|
||||||
|
|
||||||
def char_button(self, tag, stream):
|
def char_button(self, tag, stream):
|
||||||
self.containers.append(self.__class__.TextTag(
|
self.content.append(self.__class__.TextTag(
|
||||||
'CharButton', attrs={'refobj':tag.dword}))
|
'CharButton', attrs={'refobj':tag.dword}))
|
||||||
|
|
||||||
def simple_container(self, tag, name):
|
def simple_container(self, tag, name):
|
||||||
self.containers.append(self.__class__.TextTag(name))
|
self.content.append(self.__class__.TextTag(name))
|
||||||
|
|
||||||
def empline(self, tag, stream):
|
def empline(self, tag, stream):
|
||||||
|
|
||||||
@ -671,11 +675,11 @@ class Text(LRFStream):
|
|||||||
except LRFParseError:
|
except LRFParseError:
|
||||||
stream.seek(oldpos)
|
stream.seek(oldpos)
|
||||||
|
|
||||||
self.containers.append(self.__class__.TextTag(
|
self.content.append(self.__class__.TextTag(
|
||||||
'EmpLine', attrs=attrs))
|
'EmpLine', attrs=attrs))
|
||||||
|
|
||||||
def space(self, tag, stream):
|
def space(self, tag, stream):
|
||||||
self.containers.append(self.__class__.TextTag('Space',
|
self.content.append(self.__class__.TextTag('Space',
|
||||||
attrs={'xsize':tag.sword},
|
attrs={'xsize':tag.sword},
|
||||||
self_closing=True))
|
self_closing=True))
|
||||||
|
|
||||||
@ -685,18 +689,17 @@ class Text(LRFStream):
|
|||||||
{'xsize': xsize, 'ysize': ysize, 'refobj':refobj,
|
{'xsize': xsize, 'ysize': ysize, 'refobj':refobj,
|
||||||
'adjustment':self.adjustment_map[adjustment]}, self_closing=True)
|
'adjustment':self.adjustment_map[adjustment]}, self_closing=True)
|
||||||
plot.refobj = self._document.objects[refobj]
|
plot.refobj = self._document.objects[refobj]
|
||||||
self.containers.append(plot)
|
self.content.append(plot)
|
||||||
|
|
||||||
def draw_char(self, tag, stream):
|
def draw_char(self, tag, stream):
|
||||||
self.containers.append(self.__class__.TextTag('DrawChar', {'line':tag.word}))
|
self.content.append(self.__class__.TextTag('DrawChar', {'line':tag.word}))
|
||||||
|
|
||||||
def box(self, tag, stream):
|
def box(self, tag, stream):
|
||||||
self.containers.append(self.__class__.TextTag('Box',
|
self.content.append(self.__class__.TextTag('Box',
|
||||||
{'linetype':self.linetype_map[tag.word]}))
|
{'linetype':self.linetype_map[tag.word]}))
|
||||||
|
|
||||||
def initialize(self):
|
def initialize(self):
|
||||||
self.content = collections.deque()
|
self.content = collections.deque()
|
||||||
self.containers = collections.deque()
|
|
||||||
stream = cStringIO.StringIO(self.stream)
|
stream = cStringIO.StringIO(self.stream)
|
||||||
length = len(self.stream)
|
length = len(self.stream)
|
||||||
style = self.style.as_dict()
|
style = self.style.as_dict()
|
||||||
@ -722,18 +725,18 @@ class Text(LRFStream):
|
|||||||
getattr(self, action[0])(tag, action[1])
|
getattr(self, action[0])(tag, action[1])
|
||||||
elif tag.id in TextAttr.tag_map: # A Span attribute
|
elif tag.id in TextAttr.tag_map: # A Span attribute
|
||||||
action = TextAttr.tag_map[tag.id]
|
action = TextAttr.tag_map[tag.id]
|
||||||
if len(self.containers) == 0:
|
if len(self.content) == 0:
|
||||||
current_style = style.copy()
|
current_style = style.copy()
|
||||||
name, val = action[0], LRFObject.tag_to_val(action, None, tag, None)
|
name, val = action[0], LRFObject.tag_to_val(action, None, tag, None)
|
||||||
if current_style[name] != val:
|
if current_style[name] != val:
|
||||||
# No existing Span
|
# No existing Span
|
||||||
if len(self.containers) > 0 and isinstance(self.containers[-1], self.__class__.Span):
|
if len(self.content) > 0 and isinstance(self.content[-1], self.__class__.Span):
|
||||||
self.containers[-1].attrs[name] = val
|
self.content[-1].attrs[name] = val
|
||||||
else:
|
else:
|
||||||
self.containers.append(self.__class__.Span('Span', {name:val}))
|
self.content.append(self.__class__.Span('Span', {name:val}))
|
||||||
current_style[name] = val
|
current_style[name] = val
|
||||||
if self.containers:
|
if len(self.content) > 0:
|
||||||
self.empty_containers()
|
self.close_containers()
|
||||||
self.stream = None
|
self.stream = None
|
||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
@ -743,7 +746,6 @@ class Text(LRFStream):
|
|||||||
if isinstance(c, basestring):
|
if isinstance(c, basestring):
|
||||||
s += c
|
s += c
|
||||||
elif c is None:
|
elif c is None:
|
||||||
if len(open_containers) > 0: #for malformed text streams like those produced by BookDesigner
|
|
||||||
p = open_containers.pop()
|
p = open_containers.pop()
|
||||||
nl = u'\n' if p.name == 'P' else u''
|
nl = u'\n' if p.name == 'P' else u''
|
||||||
s += nl + u'</%s>'%(p.name,) + nl
|
s += nl + u'</%s>'%(p.name,) + nl
|
||||||
|
Loading…
x
Reference in New Issue
Block a user