mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Improve support in lrfviewer for LRF files that are created by badly programmed software that puts unicode strings directly into text streams. BookDesigner, I'm looking at you! lrfviewer now handles a much larger range of such unicode characters in text streams.
This commit is contained in:
parent
87657c6fd4
commit
451986a9ce
@ -698,10 +698,7 @@ class Text(LRFStream):
|
||||
lineposition_map = {1:'before', 2:'after'}
|
||||
|
||||
def add_text(self, text):
|
||||
try:
|
||||
s = unicode(text, "utf-16-le")
|
||||
except UnicodeDecodeError: # Work around for Book Designer
|
||||
s = unicode(text+'\x00', 'utf-16-le')
|
||||
s = unicode(text, "utf-16-le")
|
||||
if s:
|
||||
s = s.translate(self.text_map)
|
||||
self.content.append(self.entity_pattern.sub(entity_to_unicode, s))
|
||||
@ -802,18 +799,39 @@ class Text(LRFStream):
|
||||
length = len(self.stream)
|
||||
style = self.style.as_dict()
|
||||
current_style = style.copy()
|
||||
text_tags = set(list(TextAttr.tag_map.keys()) + \
|
||||
list(Text.text_tags.keys()) + \
|
||||
list(ruby_tags.keys()))
|
||||
text_tags -= set([0xf500+i for i in range(10)])
|
||||
text_tags.add(0xf5cc)
|
||||
|
||||
while stream.tell() < length:
|
||||
|
||||
# Is there some text beofre a tag?
|
||||
pos = self.stream.find('\xf5', stream.tell()) - 1
|
||||
if pos > 0:
|
||||
self.add_text(self.stream[stream.tell():pos])
|
||||
stream.seek(pos)
|
||||
elif pos == -2: # No tags in this stream
|
||||
# Is there some text before a tag?
|
||||
def find_first_tag(start):
|
||||
pos = self.stream.find('\xf5', start)
|
||||
if pos == -1:
|
||||
return -1
|
||||
try:
|
||||
stream.seek(pos-1)
|
||||
_t = Tag(stream)
|
||||
if _t.id in text_tags:
|
||||
return pos-1
|
||||
return find_first_tag(pos+1)
|
||||
|
||||
|
||||
except:
|
||||
return find_first_tag(pos+1)
|
||||
|
||||
start_pos = stream.tell()
|
||||
tag_pos = find_first_tag(start_pos)
|
||||
if tag_pos >= start_pos:
|
||||
if tag_pos > start_pos:
|
||||
self.add_text(self.stream[start_pos:tag_pos])
|
||||
stream.seek(tag_pos)
|
||||
else: # No tags in this stream
|
||||
self.add_text(self.stream)
|
||||
stream.seek(0, 2)
|
||||
print repr(self.stream)
|
||||
break
|
||||
|
||||
tag = Tag(stream)
|
||||
@ -1170,7 +1188,6 @@ class TOCObject(LRFStream):
|
||||
refobj = struct.unpack("<I", stream.read(4))[0]
|
||||
cnt = struct.unpack("<H", stream.read(2))[0]
|
||||
raw = stream.read(cnt)
|
||||
print repr(raw)
|
||||
label = raw.decode('utf_16_le')
|
||||
self._contents.append(TocLabel(refpage, refobj, label))
|
||||
c -= 1
|
||||
|
Loading…
x
Reference in New Issue
Block a user