mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Print extra debug info when testing simple scraper
This commit is contained in:
parent
1c8296b710
commit
a0c0bd7af9
@ -56,25 +56,35 @@ class SimpleScraper(QWebEnginePage):
|
|||||||
def __init__(self, source, parent=None):
|
def __init__(self, source, parent=None):
|
||||||
profile = create_profile(source)
|
profile = create_profile(source)
|
||||||
self.token = profile.token
|
self.token = profile.token
|
||||||
|
self.is_being_tested = source == 'test'
|
||||||
super().__init__(profile, parent)
|
super().__init__(profile, parent)
|
||||||
self.setAudioMuted(True)
|
self.setAudioMuted(True)
|
||||||
self.loadStarted.connect(self.load_started)
|
self.loadStarted.connect(self.load_started)
|
||||||
self.loadFinished.connect(self.load_finished)
|
self.loadFinished.connect(self.load_finished)
|
||||||
self.loadProgress.connect(self.load_progress)
|
self.loadProgress.connect(self.load_progress)
|
||||||
|
|
||||||
|
def print(self, *a):
|
||||||
|
print(*a, file=sys.stderr)
|
||||||
|
|
||||||
def load_started(self):
|
def load_started(self):
|
||||||
if hasattr(self, 'current_fetch'):
|
if self.is_being_tested:
|
||||||
|
self.print(f'load_started: {self.is_current_url=} {self.requestedUrl()=}')
|
||||||
|
if self.is_current_url:
|
||||||
self.current_fetch['load_started'] = True
|
self.current_fetch['load_started'] = True
|
||||||
|
|
||||||
def load_finished(self, ok):
|
def load_finished(self, ok):
|
||||||
if hasattr(self, 'current_fetch'):
|
if self.is_being_tested:
|
||||||
|
self.print(f'load_finished: {ok=} {self.is_current_url=}')
|
||||||
|
if self.is_current_url:
|
||||||
self.current_fetch['load_finished'] = True
|
self.current_fetch['load_finished'] = True
|
||||||
self.current_fetch['load_was_ok'] = ok
|
self.current_fetch['load_was_ok'] = ok
|
||||||
if not ok and self.is_current_url:
|
if not ok and self.is_current_url:
|
||||||
self.current_fetch['working'] = False
|
self.current_fetch['working'] = False
|
||||||
|
|
||||||
def load_progress(self, progress):
|
def load_progress(self, progress):
|
||||||
if hasattr(self, 'current_fetch'):
|
if self.is_being_tested:
|
||||||
|
self.print(f'load_progress: {progress=} {self.is_current_url=}')
|
||||||
|
if self.is_current_url:
|
||||||
self.current_fetch['end_time'] = time.monotonic() + self.current_fetch['timeout']
|
self.current_fetch['end_time'] = time.monotonic() + self.current_fetch['timeout']
|
||||||
|
|
||||||
def javaScriptAlert(self, url, msg):
|
def javaScriptAlert(self, url, msg):
|
||||||
@ -90,7 +100,7 @@ class SimpleScraper(QWebEnginePage):
|
|||||||
def is_current_url(self):
|
def is_current_url(self):
|
||||||
if not hasattr(self, 'current_fetch'):
|
if not hasattr(self, 'current_fetch'):
|
||||||
return False
|
return False
|
||||||
return canonicalize_qurl(self.url()) == self.current_fetch['fetching_url']
|
return canonicalize_qurl(self.requestedUrl()) == self.current_fetch['fetching_url']
|
||||||
|
|
||||||
def javaScriptConsoleMessage(self, level, message, line_num, source_id):
|
def javaScriptConsoleMessage(self, level, message, line_num, source_id):
|
||||||
parts = message.split(maxsplit=1)
|
parts = message.split(maxsplit=1)
|
||||||
@ -100,6 +110,8 @@ class SimpleScraper(QWebEnginePage):
|
|||||||
if t == 'print':
|
if t == 'print':
|
||||||
print(msg['text'], file=sys.stderr)
|
print(msg['text'], file=sys.stderr)
|
||||||
elif t == 'domready':
|
elif t == 'domready':
|
||||||
|
if self.is_being_tested:
|
||||||
|
self.print(f'domready: {self.is_current_url=}')
|
||||||
if self.is_current_url:
|
if self.is_current_url:
|
||||||
self.current_fetch['working'] = False
|
self.current_fetch['working'] = False
|
||||||
if not msg.get('failed'):
|
if not msg.get('failed'):
|
||||||
@ -137,6 +149,8 @@ qt.webenginecontext.info=false
|
|||||||
s = SimpleScraper(source)
|
s = SimpleScraper(source)
|
||||||
for line in sys.stdin.buffer:
|
for line in sys.stdin.buffer:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
|
if source == 'test':
|
||||||
|
print(line.decode('utf-8'), file=sys.stderr)
|
||||||
try:
|
try:
|
||||||
cmd, rest = line.split(b':', 1)
|
cmd, rest = line.split(b':', 1)
|
||||||
except Exception:
|
except Exception:
|
||||||
@ -225,11 +239,3 @@ def find_tests():
|
|||||||
self.assertFalse(w)
|
self.assertFalse(w)
|
||||||
|
|
||||||
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
return unittest.defaultTestLoader.loadTestsFromTestCase(TestSimpleWebEngineScraper)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
app = QApplication([])
|
|
||||||
s = SimpleScraper('test')
|
|
||||||
s.fetch('file:///t/raw.html', timeout=5)
|
|
||||||
del s
|
|
||||||
del app
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user