mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
HTML Input: switch to using a stack rather than recursion
Allows processing deeply nested input without running out of stack space. This is especially an issue on windows 64bit where the stack is quite small. Fixes #1981438 [Private bug](https://bugs.launchpad.net/calibre/+bug/1981438)
This commit is contained in:
parent
390e47bb4d
commit
4656e79cba
@ -175,25 +175,67 @@ class HTMLFile:
|
|||||||
return Link(url, self.base)
|
return Link(url, self.base)
|
||||||
|
|
||||||
|
|
||||||
def depth_first(root, flat, visited=None):
|
def depth_first(root, flat):
|
||||||
yield root
|
yield root
|
||||||
if visited is None:
|
visited = set()
|
||||||
visited = set()
|
|
||||||
visited.add(root)
|
visited.add(root)
|
||||||
for link in root.links:
|
from collections import deque
|
||||||
if link.path is not None and link not in visited:
|
stack = deque()
|
||||||
try:
|
|
||||||
index = flat.index(link)
|
def add_links_from(item):
|
||||||
except ValueError: # Can happen if max_levels is used
|
for link in reversed(item.links):
|
||||||
continue
|
if link.path is not None and link not in visited:
|
||||||
hf = flat[index]
|
stack.appendleft(link)
|
||||||
if hf not in visited:
|
|
||||||
yield hf
|
add_links_from(root)
|
||||||
visited.add(hf)
|
while stack:
|
||||||
for hf in depth_first(hf, flat, visited):
|
link = stack.popleft()
|
||||||
if hf not in visited:
|
try:
|
||||||
yield hf
|
index = flat.index(link)
|
||||||
visited.add(hf)
|
except ValueError: # Can happen if max_levels is used
|
||||||
|
continue
|
||||||
|
hf = flat[index]
|
||||||
|
if hf not in visited:
|
||||||
|
yield hf
|
||||||
|
visited.add(hf)
|
||||||
|
add_links_from(hf)
|
||||||
|
|
||||||
|
|
||||||
|
def find_tests():
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
class HF:
|
||||||
|
def __init__(self, path):
|
||||||
|
self.path = path
|
||||||
|
self.links = []
|
||||||
|
|
||||||
|
def a(self, hf):
|
||||||
|
self.links.append(hf)
|
||||||
|
return hf
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return self.path == getattr(other, 'path', other)
|
||||||
|
|
||||||
|
def __hash__(self):
|
||||||
|
return hash(self.path)
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return self.path
|
||||||
|
|
||||||
|
class TestHTMLInput(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_depth_first(self):
|
||||||
|
root = HF('root')
|
||||||
|
a = root.a(HF('a'))
|
||||||
|
a1 = a.a(HF('a1'))
|
||||||
|
x = a1.a(HF('x'))
|
||||||
|
a2 = a.a(HF('a2'))
|
||||||
|
b = root.a(HF('b'))
|
||||||
|
b1 = b.a(HF('b1'))
|
||||||
|
flat = root, a, b, a1, a2, b1, x
|
||||||
|
self.assertEqual(tuple(depth_first(flat[0], flat)), (root, a, a1, x, a2, b, b1))
|
||||||
|
|
||||||
|
return unittest.defaultTestLoader.loadTestsFromTestCase(TestHTMLInput)
|
||||||
|
|
||||||
|
|
||||||
def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None):
|
def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None):
|
||||||
@ -233,12 +275,7 @@ def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None
|
|||||||
hf.links.remove(link)
|
hf.links.remove(link)
|
||||||
|
|
||||||
next_level = list(nl)
|
next_level = list(nl)
|
||||||
orec = sys.getrecursionlimit()
|
return flat, list(depth_first(flat[0], flat))
|
||||||
sys.setrecursionlimit(500000)
|
|
||||||
try:
|
|
||||||
return flat, list(depth_first(flat[0], flat))
|
|
||||||
finally:
|
|
||||||
sys.setrecursionlimit(orec)
|
|
||||||
|
|
||||||
|
|
||||||
def get_filelist(htmlfile, dir, opts, log):
|
def get_filelist(htmlfile, dir, opts, log):
|
||||||
|
@ -261,6 +261,8 @@ def find_tests(which_tests=None, exclude_tests=None):
|
|||||||
from calibre.gui2.viewer.annotations import find_tests
|
from calibre.gui2.viewer.annotations import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
if ok('misc'):
|
if ok('misc'):
|
||||||
|
from calibre.ebooks.html.input import find_tests
|
||||||
|
a(find_tests())
|
||||||
from calibre.ebooks.metadata.test_author_sort import find_tests
|
from calibre.ebooks.metadata.test_author_sort import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
from calibre.ebooks.metadata.tag_mapper import find_tests
|
from calibre.ebooks.metadata.tag_mapper import find_tests
|
||||||
|
Loading…
x
Reference in New Issue
Block a user