LIT Input: Switch to non-recursive algorithm, to allow conversion of lIT files with deeply nested markup. Fixes #909535 (Lit books that wont convert (no DRM))

This commit is contained in:
Kovid Goyal 2011-12-29 14:57:09 +05:30
parent 9e04a6a609
commit a1a3f1ac44

View File

@ -7,9 +7,7 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \ __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
'and Marshall T. Vandegrift <llasram@gmail.com>' 'and Marshall T. Vandegrift <llasram@gmail.com>'
import struct, os import struct, os, functools, re
import functools
import re
from urlparse import urldefrag from urlparse import urldefrag
from cStringIO import StringIO from cStringIO import StringIO
from urllib import unquote as urlunquote from urllib import unquote as urlunquote
@ -165,15 +163,27 @@ class UnBinary(object):
def __str__(self): def __str__(self):
return self.raw return self.raw
def binary_to_text(self, bin, buf, index=0, depth=0): def binary_to_text(self, bin, buf):
tag_name = current_map = None stack = [(0, None, None, 0, 0, False, False, 'text', 0)]
dynamic_tag = errors = 0 self.cpos = 0
in_censorship = is_goingdown = False while stack:
state = 'text' self.binary_to_text_inner(bin, buf, stack)
flags = 0 del self.cpos
while index < len(bin): def binary_to_text_inner(self, bin, buf, stack):
c, index = read_utf8_char(bin, index) (depth, tag_name, current_map, dynamic_tag, errors,
in_censorship, is_goingdown, state, flags) = stack.pop()
if state == 'close tag':
if not tag_name:
raise LitError('Tag ends before it begins.')
buf.write(encode(u''.join(('</', tag_name, '>'))))
dynamic_tag = 0
tag_name = None
state = 'text'
while self.cpos < len(bin):
c, self.cpos = read_utf8_char(bin, self.cpos)
oc = ord(c) oc = ord(c)
if state == 'text': if state == 'text':
@ -223,26 +233,28 @@ class UnBinary(object):
buf.write(encode(tag_name)) buf.write(encode(tag_name))
elif flags & FLAG_CLOSING: elif flags & FLAG_CLOSING:
if depth == 0: if depth == 0:
raise LitError('Extra closing tag') raise LitError('Extra closing tag %s at %d'%(tag_name,
return index self.cpos))
break
elif state == 'get attr': elif state == 'get attr':
in_censorship = False in_censorship = False
if oc == 0: if oc == 0:
state = 'text'
if not is_goingdown: if not is_goingdown:
tag_name = None tag_name = None
dynamic_tag = 0 dynamic_tag = 0
buf.write(' />') buf.write(' />')
else: else:
buf.write('>') buf.write('>')
index = self.binary_to_text(bin, buf, index, depth+1) frame = (depth, tag_name, current_map,
is_goingdown = False dynamic_tag, errors, in_censorship, False,
if not tag_name: 'close tag', flags)
raise LitError('Tag ends before it begins.') stack.append(frame)
buf.write(encode(u''.join(('</', tag_name, '>')))) frame = (depth+1, None, None, 0, 0,
dynamic_tag = 0 False, False, 'text', 0)
tag_name = None stack.append(frame)
state = 'text' break
else: else:
if oc == 0x8000: if oc == 0x8000:
state = 'get attr length' state = 'get attr length'
@ -278,7 +290,7 @@ class UnBinary(object):
state = 'get value' state = 'get value'
if oc == 0xffff: if oc == 0xffff:
continue continue
if count < 0 or count > (len(bin) - index): if count < 0 or count > (len(bin) - self.cpos):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
elif state == 'get value': elif state == 'get value':
@ -303,7 +315,7 @@ class UnBinary(object):
elif state == 'get custom length': elif state == 'get custom length':
count = oc - 1 count = oc - 1
if count <= 0 or count > len(bin)-index: if count <= 0 or count > len(bin)-self.cpos:
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
dynamic_tag += 1 dynamic_tag += 1
state = 'get custom' state = 'get custom'
@ -318,7 +330,7 @@ class UnBinary(object):
elif state == 'get attr length': elif state == 'get attr length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(bin) - index): if count <= 0 or count > (len(bin) - self.cpos):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
buf.write(' ') buf.write(' ')
state = 'get custom attr' state = 'get custom attr'
@ -332,7 +344,7 @@ class UnBinary(object):
elif state == 'get href length': elif state == 'get href length':
count = oc - 1 count = oc - 1
if count <= 0 or count > (len(bin) - index): if count <= 0 or count > (len(bin) - self.cpos):
raise LitError('Invalid character count %d' % count) raise LitError('Invalid character count %d' % count)
href = '' href = ''
state = 'get href' state = 'get href'
@ -348,7 +360,6 @@ class UnBinary(object):
path = urlnormalize(path) path = urlnormalize(path)
buf.write(encode(u'"%s"' % path)) buf.write(encode(u'"%s"' % path))
state = 'get attr' state = 'get attr'
return index
class DirectoryEntry(object): class DirectoryEntry(object):
@ -896,10 +907,3 @@ class LitReader(OEBReader):
Container = LitContainer Container = LitContainer
DEFAULT_PROFILE = 'MSReader' DEFAULT_PROFILE = 'MSReader'
try:
import psyco
psyco.bind(read_utf8_char)
psyco.bind(UnBinary.binary_to_text)
except ImportError:
pass