mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Move scraper test into its own module
This commit is contained in:
parent
68e3787076
commit
e62f7427b4
@ -12,7 +12,6 @@ from calibre.constants import iswindows
|
|||||||
from calibre.ptempfile import PersistentTemporaryFile
|
from calibre.ptempfile import PersistentTemporaryFile
|
||||||
from calibre.utils.filenames import retry_on_fail
|
from calibre.utils.filenames import retry_on_fail
|
||||||
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
from calibre.utils.ipc.simple_worker import start_pipe_worker
|
||||||
from calibre.utils.resources import get_path as P
|
|
||||||
|
|
||||||
|
|
||||||
def worker_main(source):
|
def worker_main(source):
|
||||||
@ -136,43 +135,5 @@ def read_url(storage, url, timeout=60):
|
|||||||
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
|
return strip_encoding_declarations(scraper.fetch_url(url, timeout=timeout))
|
||||||
|
|
||||||
|
|
||||||
def find_tests():
|
|
||||||
import re
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from lxml.html import fromstring, tostring
|
|
||||||
skip = ''
|
|
||||||
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
|
|
||||||
if is_sanitized:
|
|
||||||
skip = 'Skipping Scraper tests as ASAN is enabled'
|
|
||||||
elif 'SKIP_QT_BUILD_TEST' in os.environ:
|
|
||||||
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
|
|
||||||
|
|
||||||
@unittest.skipIf(skip, skip)
|
|
||||||
class TestSimpleWebEngineScraper(unittest.TestCase):
|
|
||||||
|
|
||||||
def test_dom_load(self):
|
|
||||||
from qt.core import QUrl
|
|
||||||
overseer = Overseer()
|
|
||||||
for f in ('book', 'nav'):
|
|
||||||
path = P(f'templates/new_{f}.html', allow_user_override=False)
|
|
||||||
url = QUrl.fromLocalFile(path)
|
|
||||||
html = overseer.fetch_url(url, 'test')
|
|
||||||
|
|
||||||
def c(a):
|
|
||||||
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
|
|
||||||
return re.sub(r'\s+', ' ', ans)
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
raw = f.read().decode('utf-8')
|
|
||||||
self.assertEqual(c(html), c(raw))
|
|
||||||
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
|
|
||||||
w = overseer.workers
|
|
||||||
self.assertEqual(len(w), 1)
|
|
||||||
del overseer
|
|
||||||
self.assertFalse(w)
|
|
||||||
|
|
||||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(read_url([], sys.argv[-1]))
|
print(read_url([], sys.argv[-1]))
|
||||||
|
47
src/calibre/scraper/test_fetch_backend.py
Normal file
47
src/calibre/scraper/test_fetch_backend.py
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# License: GPLv3 Copyright: 2024, Kovid Goyal <kovid at kovidgoyal.net>
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
from lxml.html import fromstring, tostring
|
||||||
|
|
||||||
|
from calibre.utils.resources import get_path as P
|
||||||
|
|
||||||
|
from .simple import Overseer
|
||||||
|
|
||||||
|
skip = ''
|
||||||
|
is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
|
||||||
|
if is_sanitized:
|
||||||
|
skip = 'Skipping Scraper tests as ASAN is enabled'
|
||||||
|
elif 'SKIP_QT_BUILD_TEST' in os.environ:
|
||||||
|
skip = 'Skipping Scraper tests as it causes crashes in macOS VM'
|
||||||
|
|
||||||
|
|
||||||
|
@unittest.skipIf(skip, skip)
|
||||||
|
class TestSimpleWebEngineScraper(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_dom_load(self):
|
||||||
|
from qt.core import QUrl
|
||||||
|
overseer = Overseer()
|
||||||
|
for f in ('book', 'nav'):
|
||||||
|
path = P(f'templates/new_{f}.html', allow_user_override=False)
|
||||||
|
url = QUrl.fromLocalFile(path)
|
||||||
|
html = overseer.fetch_url(url, 'test')
|
||||||
|
|
||||||
|
def c(a):
|
||||||
|
ans = tostring(fromstring(a.encode('utf-8')), pretty_print=True, encoding='unicode')
|
||||||
|
return re.sub(r'\s+', ' ', ans)
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
raw = f.read().decode('utf-8')
|
||||||
|
self.assertEqual(c(html), c(raw))
|
||||||
|
self.assertRaises(ValueError, overseer.fetch_url, 'file:///does-not-exist.html', 'test')
|
||||||
|
w = overseer.workers
|
||||||
|
self.assertEqual(len(w), 1)
|
||||||
|
del overseer
|
||||||
|
self.assertFalse(w)
|
||||||
|
|
||||||
|
|
||||||
|
def find_tests():
|
||||||
|
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
@ -252,7 +252,7 @@ def find_tests(which_tests=None, exclude_tests=None):
|
|||||||
from calibre.utils.matcher import test
|
from calibre.utils.matcher import test
|
||||||
a(test(return_tests=True))
|
a(test(return_tests=True))
|
||||||
if ok('scraper'):
|
if ok('scraper'):
|
||||||
from calibre.scraper.simple import find_tests
|
from calibre.scraper.test_fetch_backend import find_tests
|
||||||
a(find_tests())
|
a(find_tests())
|
||||||
if ok('icu'):
|
if ok('icu'):
|
||||||
from calibre.utils.icu_test import find_tests
|
from calibre.utils.icu_test import find_tests
|
||||||
|
Loading…
x
Reference in New Issue
Block a user