diff --git a/src/calibre/scraper/simple.py b/src/calibre/scraper/simple.py index a150d5c26a..35025b0011 100644 --- a/src/calibre/scraper/simple.py +++ b/src/calibre/scraper/simple.py @@ -12,7 +12,6 @@ from calibre.constants import iswindows from calibre.ptempfile import PersistentTemporaryFile from calibre.utils.filenames import retry_on_fail from calibre.utils.ipc.simple_worker import start_pipe_worker -from calibre.utils.resources import get_path as P def worker_main(source): @@ -136,43 +135,5 @@ def read_url(storage, url, timeout=60): return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout)) -def find_tests(): - import re - import unittest - - from lxml.html import fromstring, tostring - skip = '' - is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '') - if is_sanitized: - skip = 'Skipping Scraper tests as ASAN is enabled' - elif 'SKIP_QT_BUILD_TEST' in os.environ: - skip = 'Skipping Scraper tests as it causes crashes in macOS VM' - - @unittest.skipIf(skip, skip) - class TestSimpleWebEngineScraper(unittest.TestCase): - - def test_dom_load(self): - from qt.core import QUrl - overseer = Overseer() - for f in ('book', 'nav'): - path = P(f'templates/new_{f}.html', allow_user_override=False) - url = QUrl.fromLocalFile(path) - html = overseer.fetch_url(url, 'test') - - def c(a): - ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode') - return re.sub(r'\s+', ' ', ans) - with open(path, 'rb') as f: - raw = f.read().decode('utf-8') - self.assertEqual(c(html), c(raw)) - self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test') - w = overseer.workers - self.assertEqual(len(w), 1) - del overseer - self.assertFalse(w) - - return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper) - - if __name__ == '__main__': print(read_url([], sys.argv[-1])) diff --git a/src/calibre/scraper/test_fetch_backend.py b/src/calibre/scraper/test_fetch_backend.py new file mode 100644 index 0000000000..2f2b12d0fd --- /dev/null +++ b/src/calibre/scraper/test_fetch_backend.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +# License: GPLv3 Copyright: 2024, Kovid Goyal + +import os +import re +import unittest + +from lxml.html import fromstring, tostring + +from calibre.utils.resources import get_path as P + +from .simple import Overseer + +skip = '' +is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '') +if is_sanitized: + skip = 'Skipping Scraper tests as ASAN is enabled' +elif 'SKIP_QT_BUILD_TEST' in os.environ: + skip = 'Skipping Scraper tests as it causes crashes in macOS VM' + + +@unittest.skipIf(skip, skip) +class TestSimpleWebEngineScraper(unittest.TestCase): + + def test_dom_load(self): + from qt.core import QUrl + overseer = Overseer() + for f in ('book', 'nav'): + path = P(f'templates/new_{f}.html', allow_user_override=False) + url = QUrl.fromLocalFile(path) + html = overseer.fetch_url(url, 'test') + + def c(a): + ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode') + return re.sub(r'\s+', ' ', ans) + with open(path, 'rb') as f: + raw = f.read().decode('utf-8') + self.assertEqual(c(html), c(raw)) + self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test') + w = overseer.workers + self.assertEqual(len(w), 1) + del overseer + self.assertFalse(w) + + +def find_tests(): + return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper) diff --git a/src/calibre/utils/run_tests.py b/src/calibre/utils/run_tests.py index 39760bb90b..cb0337d0fe 100644 --- a/src/calibre/utils/run_tests.py +++ b/src/calibre/utils/run_tests.py @@ -252,7 +252,7 @@ def find_tests(which_tests=None, exclude_tests=None): from calibre.utils.matcher import test a(test(return_tests=True)) if ok('scraper'): - from calibre.scraper.simple import find_tests + from calibre.scraper.test_fetch_backend import find_tests a(find_tests()) if ok('icu'): from calibre.utils.icu_test import find_tests