diff --git a/src/calibre/ebooks/html/input.py b/src/calibre/ebooks/html/input.py index f25ab230c6..baf7794d0f 100644 --- a/src/calibre/ebooks/html/input.py +++ b/src/calibre/ebooks/html/input.py @@ -175,25 +175,67 @@ class HTMLFile: return Link(url, self.base) -def depth_first(root, flat, visited=None): +def depth_first(root, flat): yield root - if visited is None: - visited = set() + visited = set() visited.add(root) - for link in root.links: - if link.path is not None and link not in visited: - try: - index = flat.index(link) - except ValueError: # Can happen if max_levels is used - continue - hf = flat[index] - if hf not in visited: - yield hf - visited.add(hf) - for hf in depth_first(hf, flat, visited): - if hf not in visited: - yield hf - visited.add(hf) + from collections import deque + stack = deque() + + def add_links_from(item): + for link in reversed(item.links): + if link.path is not None and link not in visited: + stack.appendleft(link) + + add_links_from(root) + while stack: + link = stack.popleft() + try: + index = flat.index(link) + except ValueError: # Can happen if max_levels is used + continue + hf = flat[index] + if hf not in visited: + yield hf + visited.add(hf) + add_links_from(hf) + + +def find_tests(): + import unittest + + class HF: + def __init__(self, path): + self.path = path + self.links = [] + + def a(self, hf): + self.links.append(hf) + return hf + + def __eq__(self, other): + return self.path == getattr(other, 'path', other) + + def __hash__(self): + return hash(self.path) + + def __repr__(self): + return self.path + + class TestHTMLInput(unittest.TestCase): + + def test_depth_first(self): + root = HF('root') + a = root.a(HF('a')) + a1 = a.a(HF('a1')) + x = a1.a(HF('x')) + a2 = a.a(HF('a2')) + b = root.a(HF('b')) + b1 = b.a(HF('b1')) + flat = root, a, b, a1, a2, b1, x + self.assertEqual(tuple(depth_first(flat[0], flat)), (root, a, a1, x, a2, b, b1)) + + return unittest.defaultTestLoader.loadTestsFromTestCase(TestHTMLInput) def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None): @@ -233,12 +275,7 @@ def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None hf.links.remove(link) next_level = list(nl) - orec = sys.getrecursionlimit() - sys.setrecursionlimit(500000) - try: - return flat, list(depth_first(flat[0], flat)) - finally: - sys.setrecursionlimit(orec) + return flat, list(depth_first(flat[0], flat)) def get_filelist(htmlfile, dir, opts, log): diff --git a/src/calibre/utils/run_tests.py b/src/calibre/utils/run_tests.py index 5ba7cd9e06..27145d91e3 100644 --- a/src/calibre/utils/run_tests.py +++ b/src/calibre/utils/run_tests.py @@ -261,6 +261,8 @@ def find_tests(which_tests=None, exclude_tests=None): from calibre.gui2.viewer.annotations import find_tests a(find_tests()) if ok('misc'): + from calibre.ebooks.html.input import find_tests + a(find_tests()) from calibre.ebooks.metadata.test_author_sort import find_tests a(find_tests()) from calibre.ebooks.metadata.tag_mapper import find_tests