Move scraper test into its own module

This commit is contained in:
Kovid Goyal 2024-08-08 10:46:36 +05:30
parent 68e3787076
commit e62f7427b4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
3 changed files with 48 additions and 40 deletions

View File

@ -12,7 +12,6 @@ from calibre.constants import iswindows
from calibre.ptempfile import PersistentTemporaryFile from calibre.ptempfile import PersistentTemporaryFile
from calibre.utils.filenames import retry_on_fail from calibre.utils.filenames import retry_on_fail
from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.resources import get_path as P
def worker_main(source): def worker_main(source):
@ -136,43 +135,5 @@ def read_url(storage, url, timeout=60):
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout)) return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
def find_tests():
import re
import unittest
from lxml.html import fromstring, tostring
skip = ''
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
if is_sanitized:
skip = 'Skipping Scraper tests as ASAN is enabled'
elif 'SKIP_QT_BUILD_TEST' in os.environ:
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
@unittest.skipIf(skip, skip)
class TestSimpleWebEngineScraper(unittest.TestCase):
def test_dom_load(self):
from qt.core import QUrl
overseer = Overseer()
for f in ('book', 'nav'):
path = P(f'templates/new_{f}.html', allow_user_override=False)
url = QUrl.fromLocalFile(path)
html = overseer.fetch_url(url, 'test')
def c(a):
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
return re.sub(r'\s+', ' ', ans)
with open(path, 'rb') as f:
raw = f.read().decode('utf-8')
self.assertEqual(c(html), c(raw))
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
w = overseer.workers
self.assertEqual(len(w), 1)
del overseer
self.assertFalse(w)
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
if __name__ == '__main__': if __name__ == '__main__':
print(read_url([], sys.argv[-1])) print(read_url([], sys.argv[-1]))

View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
import os
import re
import unittest
from lxml.html import fromstring, tostring
from calibre.utils.resources import get_path as P
from .simple import Overseer
skip = ''
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
if is_sanitized:
skip = 'Skipping Scraper tests as ASAN is enabled'
elif 'SKIP_QT_BUILD_TEST' in os.environ:
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
@unittest.skipIf(skip, skip)
class TestSimpleWebEngineScraper(unittest.TestCase):
def test_dom_load(self):
from qt.core import QUrl
overseer = Overseer()
for f in ('book', 'nav'):
path = P(f'templates/new_{f}.html', allow_user_override=False)
url = QUrl.fromLocalFile(path)
html = overseer.fetch_url(url, 'test')
def c(a):
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
return re.sub(r'\s+', ' ', ans)
with open(path, 'rb') as f:
raw = f.read().decode('utf-8')
self.assertEqual(c(html), c(raw))
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
w = overseer.workers
self.assertEqual(len(w), 1)
del overseer
self.assertFalse(w)
def find_tests():
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)

View File

@ -252,7 +252,7 @@ def find_tests(which_tests=None, exclude_tests=None):
from calibre.utils.matcher import test from calibre.utils.matcher import test
a(test(return_tests=True)) a(test(return_tests=True))
if ok('scraper'): if ok('scraper'):
from calibre.scraper.simple import find_tests from calibre.scraper.test_fetch_backend import find_tests
a(find_tests()) a(find_tests())
if ok('icu'): if ok('icu'):
from calibre.utils.icu_test import find_tests from calibre.utils.icu_test import find_tests