mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
LRF Input: Detect and workaround LRF files that have deeply nested spans, instead of crashing. Fixes #759680 (Conversion stack overflow - .lrf to .epub)
This commit is contained in:
parent
50acfc314d
commit
7db1276a6b
@ -6,8 +6,8 @@ __license__ = 'GPL v3'
|
|||||||
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
|
||||||
__docformat__ = 'restructuredtext en'
|
__docformat__ = 'restructuredtext en'
|
||||||
|
|
||||||
import os, textwrap, sys
|
import os, textwrap, sys, operator
|
||||||
from copy import deepcopy
|
from copy import deepcopy, copy
|
||||||
|
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
|
||||||
@ -149,9 +149,65 @@ class TextBlock(etree.XSLTExtension):
|
|||||||
self.root = root
|
self.root = root
|
||||||
self.parent = root
|
self.parent = root
|
||||||
self.add_text_to = (self.parent, 'text')
|
self.add_text_to = (self.parent, 'text')
|
||||||
|
self.fix_deep_nesting(node)
|
||||||
for child in node:
|
for child in node:
|
||||||
self.process_child(child)
|
self.process_child(child)
|
||||||
|
|
||||||
|
def fix_deep_nesting(self, node):
|
||||||
|
deepest = 1
|
||||||
|
|
||||||
|
def depth(node):
|
||||||
|
parent = node.getparent()
|
||||||
|
ans = 1
|
||||||
|
while parent is not None:
|
||||||
|
ans += 1
|
||||||
|
parent = parent.getparent()
|
||||||
|
return ans
|
||||||
|
|
||||||
|
for span in node.xpath('descendant::Span'):
|
||||||
|
d = depth(span)
|
||||||
|
if d > deepest:
|
||||||
|
deepest = d
|
||||||
|
if d > 500:
|
||||||
|
break
|
||||||
|
|
||||||
|
if deepest < 500:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.log.warn('Found deeply nested spans. Flattening.')
|
||||||
|
#with open('/t/before.xml', 'wb') as f:
|
||||||
|
# f.write(etree.tostring(node, method='xml'))
|
||||||
|
|
||||||
|
spans = [(depth(span), span) for span in node.xpath('descendant::Span')]
|
||||||
|
spans.sort(key=operator.itemgetter(0), reverse=True)
|
||||||
|
|
||||||
|
for depth, span in spans:
|
||||||
|
if depth < 3:
|
||||||
|
continue
|
||||||
|
p = span.getparent()
|
||||||
|
gp = p.getparent()
|
||||||
|
idx = p.index(span)
|
||||||
|
pidx = gp.index(p)
|
||||||
|
children = list(p)[idx:]
|
||||||
|
t = children[-1].tail
|
||||||
|
t = t if t else ''
|
||||||
|
children[-1].tail = t + (p.tail if p.tail else '')
|
||||||
|
p.tail = ''
|
||||||
|
pattrib = dict(**p.attrib) if p.tag == 'Span' else {}
|
||||||
|
for child in children:
|
||||||
|
p.remove(child)
|
||||||
|
if pattrib and child.tag == "Span":
|
||||||
|
attrib = copy(pattrib)
|
||||||
|
attrib.update(child.attrib)
|
||||||
|
child.attrib.update(attrib)
|
||||||
|
|
||||||
|
|
||||||
|
for child in reversed(children):
|
||||||
|
gp.insert(pidx+1, child)
|
||||||
|
|
||||||
|
#with open('/t/after.xml', 'wb') as f:
|
||||||
|
# f.write(etree.tostring(node, method='xml'))
|
||||||
|
|
||||||
def add_text(self, text):
|
def add_text(self, text):
|
||||||
if text:
|
if text:
|
||||||
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
|
if getattr(self.add_text_to[0], self.add_text_to[1]) is None:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user