LIT Input: Switch to non-recursive algorithm, to allow conversion of lIT files with deeply nested markup. Fixes #909535 (Lit books that wont convert (no DRM))

2025-07-09 03:04:10 -04:00 · 2011-12-29 14:57:09 +05:30 · 2011-12-29 14:57:09 +05:30 · a1a3f1ac44
commit a1a3f1ac44
parent 9e04a6a609
1 changed files with 37 additions and 33 deletions
--- a/src/calibre/ebooks/lit/reader.py
+++ b/src/calibre/ebooks/lit/reader.py
@ -7,9 +7,7 @@ __license__   = 'GPL v3'
 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net> ' \
    'and Marshall T. Vandegrift <llasram@gmail.com>'

-import struct, os
-import functools
-import re
+import struct, os, functools, re
 from urlparse import urldefrag
 from cStringIO import StringIO
 from urllib import unquote as urlunquote
@ -165,15 +163,27 @@ class UnBinary(object):
    def __str__(self):
        return self.raw

-    def binary_to_text(self, bin, buf, index=0, depth=0):
-        tag_name = current_map = None
-        dynamic_tag = errors = 0
-        in_censorship = is_goingdown = False
-        state = 'text'
-        flags = 0
+    def binary_to_text(self, bin, buf):
+        stack = [(0, None, None, 0, 0, False, False, 'text', 0)]
+        self.cpos = 0
+        while stack:
+            self.binary_to_text_inner(bin, buf, stack)
+        del self.cpos

-        while index < len(bin):
-            c, index = read_utf8_char(bin, index)
+    def binary_to_text_inner(self, bin, buf, stack):
+        (depth, tag_name, current_map, dynamic_tag, errors,
+                in_censorship, is_goingdown, state, flags) = stack.pop()
+
+        if state == 'close tag':
+            if not tag_name:
+                raise LitError('Tag ends before it begins.')
+            buf.write(encode(u''.join(('</', tag_name, '>'))))
+            dynamic_tag = 0
+            tag_name = None
+            state = 'text'
+
+        while self.cpos < len(bin):
+            c, self.cpos = read_utf8_char(bin, self.cpos)
            oc = ord(c)

            if state == 'text':
@ -223,26 +233,28 @@ class UnBinary(object):
                    buf.write(encode(tag_name))
                elif flags & FLAG_CLOSING:
                    if depth == 0:
-                        raise LitError('Extra closing tag')
-                    return index
+                        raise LitError('Extra closing tag %s at %d'%(tag_name,
+                            self.cpos))
+                    break

            elif state == 'get attr':
                in_censorship = False
                if oc == 0:
+                    state = 'text'
                    if not is_goingdown:
                        tag_name = None
                        dynamic_tag = 0
                        buf.write(' />')
                    else:
                        buf.write('>')
-                        index = self.binary_to_text(bin, buf, index, depth+1)
-                        is_goingdown = False
-                        if not tag_name:
-                            raise LitError('Tag ends before it begins.')
-                        buf.write(encode(u''.join(('</', tag_name, '>'))))
-                        dynamic_tag = 0
-                        tag_name = None
-                    state = 'text'
+                        frame = (depth, tag_name, current_map,
+                            dynamic_tag, errors, in_censorship, False,
+                            'close tag', flags)
+                        stack.append(frame)
+                        frame = (depth+1, None, None, 0, 0,
+                                False, False, 'text', 0)
+                        stack.append(frame)
+                        break
                else:
                    if oc == 0x8000:
                        state = 'get attr length'
@ -278,7 +290,7 @@ class UnBinary(object):
                state = 'get value'
                if oc == 0xffff:
                    continue
-                if count < 0 or count > (len(bin) - index):
+                if count < 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)

            elif state == 'get value':
@ -303,7 +315,7 @@ class UnBinary(object):

            elif state == 'get custom length':
                count = oc - 1
-                if count <= 0 or count > len(bin)-index:
+                if count <= 0 or count > len(bin)-self.cpos:
                    raise LitError('Invalid character count %d' % count)
                dynamic_tag += 1
                state = 'get custom'
@ -318,7 +330,7 @@ class UnBinary(object):

            elif state == 'get attr length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                buf.write(' ')
                state = 'get custom attr'
@ -332,7 +344,7 @@ class UnBinary(object):

            elif state == 'get href length':
                count = oc - 1
-                if count <= 0 or count > (len(bin) - index):
+                if count <= 0 or count > (len(bin) - self.cpos):
                    raise LitError('Invalid character count %d' % count)
                href = ''
                state = 'get href'
@ -348,7 +360,6 @@ class UnBinary(object):
                    path = urlnormalize(path)
                    buf.write(encode(u'"%s"' % path))
                    state = 'get attr'
-        return index


 class DirectoryEntry(object):
@ -896,10 +907,3 @@ class LitReader(OEBReader):
    Container = LitContainer
    DEFAULT_PROFILE = 'MSReader'

-
-try:
-    import psyco
-    psyco.bind(read_utf8_char)
-    psyco.bind(UnBinary.binary_to_text)
-except ImportError:
-    pass