diff --git a/src/calibre/scraper/simple.py b/src/calibre/scraper/simple.py
index f8325fbd1e..066d43f8aa 100644
--- a/src/calibre/scraper/simple.py
+++ b/src/calibre/scraper/simple.py
@@ -5,139 +5,15 @@
 
 import json
 import os
-import secrets
 import sys
-import time
-from functools import lru_cache
-from qt.core import QApplication, QEventLoop, QLoggingCategory, QUrl
-from qt.webengine import QWebEnginePage, QWebEngineProfile, QWebEngineSettings
-from threading import Lock
+import weakref
+from qt.core import QLoggingCategory, QUrl
+from threading import Lock, Thread
 
-from calibre.constants import cache_dir, iswindows
-from calibre.gui2.webengine import create_script, insert_scripts
+from calibre.constants import iswindows
 from calibre.ptempfile import PersistentTemporaryFile
-from calibre.utils.ipc.simple_worker import start_pipe_worker
 from calibre.utils.filenames import retry_on_fail
-
-
-def canonicalize_qurl(qurl):
-    qurl = qurl.adjusted(QUrl.UrlFormattingOption.StripTrailingSlash | QUrl.UrlFormattingOption.NormalizePathSegments)
-    if qurl.path() == '/':
-        qurl = qurl.adjusted(QUrl.UrlFormattingOption.RemovePath)
-    return qurl
-
-
-@lru_cache(maxsize=None)
-def create_profile(cache_name='simple', allow_js=False):
-    from calibre.utils.random_ua import random_common_chrome_user_agent
-    ans = QWebEngineProfile(cache_name, QApplication.instance())
-    ans.setHttpUserAgent(random_common_chrome_user_agent())
-    ans.setHttpCacheMaximumSize(0)  # managed by webengine
-    ans.setCachePath(os.path.join(cache_dir(), 'scraper', cache_name))
-    s = ans.settings()
-    a = s.setAttribute
-    a(QWebEngineSettings.WebAttribute.PluginsEnabled, False)
-    a(QWebEngineSettings.WebAttribute.JavascriptEnabled, allow_js)
-    s.setUnknownUrlSchemePolicy(QWebEngineSettings.UnknownUrlSchemePolicy.DisallowUnknownUrlSchemes)
-    a(QWebEngineSettings.WebAttribute.JavascriptCanOpenWindows, False)
-    a(QWebEngineSettings.WebAttribute.JavascriptCanAccessClipboard, False)
-    # ensure javascript cannot read from local files
-    a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
-    a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
-    js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
-    ans.token = secrets.token_hex()
-    js = js.replace('TOKEN', ans.token)
-    insert_scripts(ans, create_script('scraper.js', js))
-    return ans
-
-
-class SimpleScraper(QWebEnginePage):
-
-    def __init__(self, source, parent=None):
-        profile = create_profile(source)
-        self.token = profile.token
-        self.is_being_tested = source == 'test'
-        super().__init__(profile, parent)
-        self.setAudioMuted(True)
-        self.loadStarted.connect(self.load_started)
-        self.loadFinished.connect(self.load_finished)
-        self.loadProgress.connect(self.load_progress)
-
-    def print(self, *a):
-        print(*a, file=sys.stderr)
-
-    def load_started(self):
-        if self.is_being_tested:
-            self.print(f'load_started: {self.is_current_url=} {self.requestedUrl()=}')
-        if self.is_current_url:
-            self.current_fetch['load_started'] = True
-
-    def load_finished(self, ok):
-        if self.is_being_tested:
-            self.print(f'load_finished: {ok=} {self.is_current_url=}')
-        if self.is_current_url:
-            self.current_fetch['load_finished'] = True
-            self.current_fetch['load_was_ok'] = ok
-            if not ok and self.is_current_url:
-                self.current_fetch['working'] = False
-
-    def load_progress(self, progress):
-        if self.is_being_tested:
-            self.print(f'load_progress: {progress=} {self.is_current_url=}')
-        if self.is_current_url:
-            self.current_fetch['end_time'] = time.monotonic() + self.current_fetch['timeout']
-
-    def javaScriptAlert(self, url, msg):
-        pass
-
-    def javaScriptConfirm(self, url, msg):
-        return True
-
-    def javaScriptPrompt(self, url, msg, defval):
-        return True, defval
-
-    @property
-    def is_current_url(self):
-        if not hasattr(self, 'current_fetch'):
-            return False
-        return canonicalize_qurl(self.requestedUrl()) == self.current_fetch['fetching_url']
-
-    def javaScriptConsoleMessage(self, level, message, line_num, source_id):
-        parts = message.split(maxsplit=1)
-        if len(parts) == 2 and parts[0] == self.token:
-            msg = json.loads(parts[1])
-            t = msg.get('type')
-            if t == 'print':
-                print(msg['text'], file=sys.stderr)
-            elif t == 'domready':
-                if self.is_being_tested:
-                    self.print(f'domready: {self.is_current_url=}')
-                if self.is_current_url:
-                    self.current_fetch['working'] = False
-                    if not msg.get('failed'):
-                        self.current_fetch['html'] = msg['html']
-
-    def fetch(self, url_or_qurl, timeout=60):
-        fetching_url = QUrl(url_or_qurl)
-        self.current_fetch = {
-            'timeout': timeout, 'end_time': time.monotonic() + timeout,
-            'fetching_url': canonicalize_qurl(fetching_url), 'working': True,
-            'load_started': False
-        }
-        self.load(fetching_url)
-        try:
-            app = QApplication.instance()
-            while self.current_fetch['working'] and time.monotonic() < self.current_fetch['end_time']:
-                app.processEvents(QEventLoop.ProcessEventsFlag.ExcludeUserInputEvents)
-            ans = self.current_fetch.get('html')
-            if ans is None:
-                eurl = fetching_url.toString()
-                if self.current_fetch['working']:
-                    raise TimeoutError(f'Timed out loading HTML from: {eurl}')
-                raise ValueError(f'Failed to load HTML from: {eurl}')
-            return ans
-        finally:
-            del self.current_fetch
+from calibre.utils.ipc.simple_worker import start_pipe_worker
 
 
 def worker_main(source):
@@ -145,6 +21,8 @@ def worker_main(source):
 qt.webenginecontext.info=false
 ''')
     from calibre.gui2 import must_use_qt
+
+    from .simple_backend import SimpleScraper
     must_use_qt()
     s = SimpleScraper(source)
     for line in sys.stdin.buffer:
@@ -170,11 +48,15 @@ qt.webenginecontext.info=false
             print(json.dumps(result), flush=True)
 
 
+overseers = []
+
+
 class Overseer:
 
     def __init__(self):
         self.lock = Lock()
         self.workers = {}
+        overseers.append(weakref.ref(self))
 
     def worker_for_source(self, source):
         with self.lock:
@@ -212,12 +94,29 @@ class Overseer:
                         if w.wait(0.1) is None:
                             w.kill()
             self.workers.clear()
+    close = __del__
+
+
+def cleanup_overseers():
+    threads = []
+    for x in overseers:
+        o = x()
+        if o is not None:
+            t = Thread(target=o.close, name='CloseOverSeer')
+            t.start()
+            threads.append(t)
+    del overseers[:]
+
+    def join_all():
+        for t in threads:
+            t.join()
+    return join_all
 
 
 def find_tests():
+    import re
     import unittest
     from lxml.html import fromstring, tostring
-    import re
     skip = ''
     is_sanitized = 'libasan' in os.environ.get('LD_PRELOAD', '')
     if is_sanitized:
diff --git a/src/calibre/scraper/simple_backend.py b/src/calibre/scraper/simple_backend.py
new file mode 100644
index 0000000000..b4d3312589
--- /dev/null
+++ b/src/calibre/scraper/simple_backend.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: GPL v3 Copyright: 2022, Kovid Goyal <kovid at kovidgoyal.net>
+
+import json
+import os
+import secrets
+import sys
+import time
+from functools import lru_cache
+from qt.core import QApplication, QEventLoop, QUrl
+from qt.webengine import QWebEnginePage, QWebEngineProfile, QWebEngineSettings
+
+from calibre.constants import cache_dir
+from calibre.gui2.webengine import create_script, insert_scripts
+
+
+def canonicalize_qurl(qurl):
+    qurl = qurl.adjusted(QUrl.UrlFormattingOption.StripTrailingSlash | QUrl.UrlFormattingOption.NormalizePathSegments)
+    if qurl.path() == '/':
+        qurl = qurl.adjusted(QUrl.UrlFormattingOption.RemovePath)
+    return qurl
+
+
+@lru_cache(maxsize=None)
+def create_profile(cache_name='simple', allow_js=False):
+    from calibre.utils.random_ua import random_common_chrome_user_agent
+    ans = QWebEngineProfile(cache_name, QApplication.instance())
+    ans.setHttpUserAgent(random_common_chrome_user_agent())
+    ans.setHttpCacheMaximumSize(0)  # managed by webengine
+    ans.setCachePath(os.path.join(cache_dir(), 'scraper', cache_name))
+    s = ans.settings()
+    a = s.setAttribute
+    a(QWebEngineSettings.WebAttribute.PluginsEnabled, False)
+    a(QWebEngineSettings.WebAttribute.JavascriptEnabled, allow_js)
+    s.setUnknownUrlSchemePolicy(QWebEngineSettings.UnknownUrlSchemePolicy.DisallowUnknownUrlSchemes)
+    a(QWebEngineSettings.WebAttribute.JavascriptCanOpenWindows, False)
+    a(QWebEngineSettings.WebAttribute.JavascriptCanAccessClipboard, False)
+    # ensure javascript cannot read from local files
+    a(QWebEngineSettings.WebAttribute.LocalContentCanAccessFileUrls, False)
+    a(QWebEngineSettings.WebAttribute.AllowWindowActivationFromJavaScript, False)
+    js = P('scraper.js', allow_user_override=False, data=True).decode('utf-8')
+    ans.token = secrets.token_hex()
+    js = js.replace('TOKEN', ans.token)
+    insert_scripts(ans, create_script('scraper.js', js))
+    return ans
+
+
+class SimpleScraper(QWebEnginePage):
+
+    def __init__(self, source, parent=None):
+        profile = create_profile(source)
+        self.token = profile.token
+        self.is_being_tested = source == 'test'
+        super().__init__(profile, parent)
+        self.setAudioMuted(True)
+        self.loadStarted.connect(self.load_started)
+        self.loadFinished.connect(self.load_finished)
+        self.loadProgress.connect(self.load_progress)
+
+    def print(self, *a):
+        print(*a, file=sys.stderr)
+
+    def load_started(self):
+        if self.is_being_tested:
+            self.print(f'load_started: {self.is_current_url=} {self.requestedUrl()=}')
+        if self.is_current_url:
+            self.current_fetch['load_started'] = True
+
+    def load_finished(self, ok):
+        if self.is_being_tested:
+            self.print(f'load_finished: {ok=} {self.is_current_url=}')
+        if self.is_current_url:
+            self.current_fetch['load_finished'] = True
+            self.current_fetch['load_was_ok'] = ok
+            if not ok and self.is_current_url:
+                self.current_fetch['working'] = False
+
+    def load_progress(self, progress):
+        if self.is_being_tested:
+            self.print(f'load_progress: {progress=} {self.is_current_url=}')
+        if self.is_current_url:
+            self.current_fetch['end_time'] = time.monotonic() + self.current_fetch['timeout']
+
+    def javaScriptAlert(self, url, msg):
+        pass
+
+    def javaScriptConfirm(self, url, msg):
+        return True
+
+    def javaScriptPrompt(self, url, msg, defval):
+        return True, defval
+
+    @property
+    def is_current_url(self):
+        if not hasattr(self, 'current_fetch'):
+            return False
+        return canonicalize_qurl(self.requestedUrl()) == self.current_fetch['fetching_url']
+
+    def javaScriptConsoleMessage(self, level, message, line_num, source_id):
+        parts = message.split(maxsplit=1)
+        if len(parts) == 2 and parts[0] == self.token:
+            msg = json.loads(parts[1])
+            t = msg.get('type')
+            if t == 'print':
+                print(msg['text'], file=sys.stderr)
+            elif t == 'domready':
+                if self.is_being_tested:
+                    self.print(f'domready: {self.is_current_url=}')
+                if self.is_current_url:
+                    self.current_fetch['working'] = False
+                    if not msg.get('failed'):
+                        self.current_fetch['html'] = msg['html']
+
+    def fetch(self, url_or_qurl, timeout=60):
+        fetching_url = QUrl(url_or_qurl)
+        self.current_fetch = {
+            'timeout': timeout, 'end_time': time.monotonic() + timeout,
+            'fetching_url': canonicalize_qurl(fetching_url), 'working': True,
+            'load_started': False
+        }
+        self.load(fetching_url)
+        try:
+            app = QApplication.instance()
+            while self.current_fetch['working'] and time.monotonic() < self.current_fetch['end_time']:
+                app.processEvents(QEventLoop.ProcessEventsFlag.ExcludeUserInputEvents)
+            ans = self.current_fetch.get('html')
+            if ans is None:
+                eurl = fetching_url.toString()
+                if self.current_fetch['working']:
+                    raise TimeoutError(f'Timed out loading HTML from: {eurl}')
+                raise ValueError(f'Failed to load HTML from: {eurl}')
+            return ans
+        finally:
+            del self.current_fetch