mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move scraper test into its own module
This commit is contained in:
parent
68e3787076
commit
e62f7427b4
@ -12,7 +12,6 @@ from calibre.constants import iswindows
|
||||
from calibre.ptempfile import PersistentTemporaryFile
|
||||
from calibre.utils.filenames import retry_on_fail
|
||||
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
||||
from calibre.utils.resources import get_path as P
|
||||
|
||||
|
||||
def worker_main(source):
|
||||
@ -136,43 +135,5 @@ def read_url(storage, url, timeout=60):
|
||||
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
|
||||
|
||||
|
||||
def find_tests():
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from lxml.html import fromstring, tostring
|
||||
skip = ''
|
||||
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
|
||||
if is_sanitized:
|
||||
skip = 'Skipping Scraper tests as ASAN is enabled'
|
||||
elif 'SKIP_QT_BUILD_TEST' in os.environ:
|
||||
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
|
||||
|
||||
@unittest.skipIf(skip, skip)
|
||||
class TestSimpleWebEngineScraper(unittest.TestCase):
|
||||
|
||||
def test_dom_load(self):
|
||||
from qt.core import QUrl
|
||||
overseer = Overseer()
|
||||
for f in ('book', 'nav'):
|
||||
path = P(f'templates/new_{f}.html', allow_user_override=False)
|
||||
url = QUrl.fromLocalFile(path)
|
||||
html = overseer.fetch_url(url, 'test')
|
||||
|
||||
def c(a):
|
||||
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
|
||||
return re.sub(r'\s+', ' ', ans)
|
||||
with open(path, 'rb') as f:
|
||||
raw = f.read().decode('utf-8')
|
||||
self.assertEqual(c(html), c(raw))
|
||||
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
|
||||
w = overseer.workers
|
||||
self.assertEqual(len(w), 1)
|
||||
del overseer
|
||||
self.assertFalse(w)
|
||||
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(read_url([], sys.argv[-1]))
|
||||
|
47
src/calibre/scraper/test_fetch_backend.py
Normal file
47
src/calibre/scraper/test_fetch_backend.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||
|
||||
import os
|
||||
import re
|
||||
import unittest
|
||||
|
||||
from lxml.html import fromstring, tostring
|
||||
|
||||
from calibre.utils.resources import get_path as P
|
||||
|
||||
from .simple import Overseer
|
||||
|
||||
skip = ''
|
||||
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
|
||||
if is_sanitized:
|
||||
skip = 'Skipping Scraper tests as ASAN is enabled'
|
||||
elif 'SKIP_QT_BUILD_TEST' in os.environ:
|
||||
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
|
||||
|
||||
|
||||
@unittest.skipIf(skip, skip)
|
||||
class TestSimpleWebEngineScraper(unittest.TestCase):
|
||||
|
||||
def test_dom_load(self):
|
||||
from qt.core import QUrl
|
||||
overseer = Overseer()
|
||||
for f in ('book', 'nav'):
|
||||
path = P(f'templates/new_{f}.html', allow_user_override=False)
|
||||
url = QUrl.fromLocalFile(path)
|
||||
html = overseer.fetch_url(url, 'test')
|
||||
|
||||
def c(a):
|
||||
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
|
||||
return re.sub(r'\s+', ' ', ans)
|
||||
with open(path, 'rb') as f:
|
||||
raw = f.read().decode('utf-8')
|
||||
self.assertEqual(c(html), c(raw))
|
||||
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
|
||||
w = overseer.workers
|
||||
self.assertEqual(len(w), 1)
|
||||
del overseer
|
||||
self.assertFalse(w)
|
||||
|
||||
|
||||
def find_tests():
|
||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
@ -252,7 +252,7 @@ def find_tests(which_tests=None, exclude_tests=None):
|
||||
from calibre.utils.matcher import test
|
||||
a(test(return_tests=True))
|
||||
if ok('scraper'):
|
||||
from calibre.scraper.simple import find_tests
|
||||
from calibre.scraper.test_fetch_backend import find_tests
|
||||
a(find_tests())
|
||||
if ok('icu'):
|
||||
from calibre.utils.icu_test import find_tests
|
||||
|
Loading…
x
Reference in New Issue
Block a user