mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
LIT Input: Switch to non-recursive algorithm, to allow conversion of lIT files with deeply nested markup. Fixes #909535 (Lit books that wont convert (no DRM))
This commit is contained in:
parent
9e04a6a609
commit
a1a3f1ac44
@ -7,9 +7,7 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
|
||||||
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
'and Marshall T. Vandegrift <llasram@gmail.com>'
|
||||||
|
|
||||||
import struct, os
|
import struct, os, functools, re
|
||||||
import functools
|
|
||||||
import re
|
|
||||||
from urlparse import urldefrag
|
from urlparse import urldefrag
|
||||||
from cStringIO import StringIO
|
from cStringIO import StringIO
|
||||||
from urllib import unquote as urlunquote
|
from urllib import unquote as urlunquote
|
||||||
@ -165,15 +163,27 @@ class UnBinary(object):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
return self.raw
|
return self.raw
|
||||||
|
|
||||||
def binary_to_text(self, bin, buf, index=0, depth=0):
|
def binary_to_text(self, bin, buf):
|
||||||
tag_name = current_map = None
|
stack = [(0, None, None, 0, 0, False, False, 'text', 0)]
|
||||||
dynamic_tag = errors = 0
|
self.cpos = 0
|
||||||
in_censorship = is_goingdown = False
|
while stack:
|
||||||
state = 'text'
|
self.binary_to_text_inner(bin, buf, stack)
|
||||||
flags = 0
|
del self.cpos
|
||||||
|
|
||||||
while index < len(bin):
|
def binary_to_text_inner(self, bin, buf, stack):
|
||||||
c, index = read_utf8_char(bin, index)
|
(depth, tag_name, current_map, dynamic_tag, errors,
|
||||||
|
in_censorship, is_goingdown, state, flags) = stack.pop()
|
||||||
|
|
||||||
|
if state == 'close tag':
|
||||||
|
if not tag_name:
|
||||||
|
raise LitError('Tag ends before it begins.')
|
||||||
|
buf.write(encode(u''.join(('</', tag_name, '>'))))
|
||||||
|
dynamic_tag = 0
|
||||||
|
tag_name = None
|
||||||
|
state = 'text'
|
||||||
|
|
||||||
|
while self.cpos < len(bin):
|
||||||
|
c, self.cpos = read_utf8_char(bin, self.cpos)
|
||||||
oc = ord(c)
|
oc = ord(c)
|
||||||
|
|
||||||
if state == 'text':
|
if state == 'text':
|
||||||
@ -223,26 +233,28 @@ class UnBinary(object):
|
|||||||
buf.write(encode(tag_name))
|
buf.write(encode(tag_name))
|
||||||
elif flags & FLAG_CLOSING:
|
elif flags & FLAG_CLOSING:
|
||||||
if depth == 0:
|
if depth == 0:
|
||||||
raise LitError('Extra closing tag')
|
raise LitError('Extra closing tag %s at %d'%(tag_name,
|
||||||
return index
|
self.cpos))
|
||||||
|
break
|
||||||
|
|
||||||
elif state == 'get attr':
|
elif state == 'get attr':
|
||||||
in_censorship = False
|
in_censorship = False
|
||||||
if oc == 0:
|
if oc == 0:
|
||||||
|
state = 'text'
|
||||||
if not is_goingdown:
|
if not is_goingdown:
|
||||||
tag_name = None
|
tag_name = None
|
||||||
dynamic_tag = 0
|
dynamic_tag = 0
|
||||||
buf.write(' />')
|
buf.write(' />')
|
||||||
else:
|
else:
|
||||||
buf.write('>')
|
buf.write('>')
|
||||||
index = self.binary_to_text(bin, buf, index, depth+1)
|
frame = (depth, tag_name, current_map,
|
||||||
is_goingdown = False
|
dynamic_tag, errors, in_censorship, False,
|
||||||
if not tag_name:
|
'close tag', flags)
|
||||||
raise LitError('Tag ends before it begins.')
|
stack.append(frame)
|
||||||
buf.write(encode(u''.join(('</', tag_name, '>'))))
|
frame = (depth+1, None, None, 0, 0,
|
||||||
dynamic_tag = 0
|
False, False, 'text', 0)
|
||||||
tag_name = None
|
stack.append(frame)
|
||||||
state = 'text'
|
break
|
||||||
else:
|
else:
|
||||||
if oc == 0x8000:
|
if oc == 0x8000:
|
||||||
state = 'get attr length'
|
state = 'get attr length'
|
||||||
@ -278,7 +290,7 @@ class UnBinary(object):
|
|||||||
state = 'get value'
|
state = 'get value'
|
||||||
if oc == 0xffff:
|
if oc == 0xffff:
|
||||||
continue
|
continue
|
||||||
if count < 0 or count > (len(bin) - index):
|
if count < 0 or count > (len(bin) - self.cpos):
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
|
|
||||||
elif state == 'get value':
|
elif state == 'get value':
|
||||||
@ -303,7 +315,7 @@ class UnBinary(object):
|
|||||||
|
|
||||||
elif state == 'get custom length':
|
elif state == 'get custom length':
|
||||||
count = oc - 1
|
count = oc - 1
|
||||||
if count <= 0 or count > len(bin)-index:
|
if count <= 0 or count > len(bin)-self.cpos:
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
dynamic_tag += 1
|
dynamic_tag += 1
|
||||||
state = 'get custom'
|
state = 'get custom'
|
||||||
@ -318,7 +330,7 @@ class UnBinary(object):
|
|||||||
|
|
||||||
elif state == 'get attr length':
|
elif state == 'get attr length':
|
||||||
count = oc - 1
|
count = oc - 1
|
||||||
if count <= 0 or count > (len(bin) - index):
|
if count <= 0 or count > (len(bin) - self.cpos):
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
buf.write(' ')
|
buf.write(' ')
|
||||||
state = 'get custom attr'
|
state = 'get custom attr'
|
||||||
@ -332,7 +344,7 @@ class UnBinary(object):
|
|||||||
|
|
||||||
elif state == 'get href length':
|
elif state == 'get href length':
|
||||||
count = oc - 1
|
count = oc - 1
|
||||||
if count <= 0 or count > (len(bin) - index):
|
if count <= 0 or count > (len(bin) - self.cpos):
|
||||||
raise LitError('Invalid character count %d' % count)
|
raise LitError('Invalid character count %d' % count)
|
||||||
href = ''
|
href = ''
|
||||||
state = 'get href'
|
state = 'get href'
|
||||||
@ -348,7 +360,6 @@ class UnBinary(object):
|
|||||||
path = urlnormalize(path)
|
path = urlnormalize(path)
|
||||||
buf.write(encode(u'"%s"' % path))
|
buf.write(encode(u'"%s"' % path))
|
||||||
state = 'get attr'
|
state = 'get attr'
|
||||||
return index
|
|
||||||
|
|
||||||
|
|
||||||
class DirectoryEntry(object):
|
class DirectoryEntry(object):
|
||||||
@ -896,10 +907,3 @@ class LitReader(OEBReader):
|
|||||||
Container = LitContainer
|
Container = LitContainer
|
||||||
DEFAULT_PROFILE = 'MSReader'
|
DEFAULT_PROFILE = 'MSReader'
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
import psyco
|
|
||||||
psyco.bind(read_utf8_char)
|
|
||||||
psyco.bind(UnBinary.binary_to_text)
|
|
||||||
except ImportError:
|
|
||||||
pass
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user