This commit is contained in:
shamoon 2025-04-18 13:08:10 -07:00
parent 18e77fabf5
commit 170654cc3c
No known key found for this signature in database

View File

@ -1,9 +1,7 @@
import sys
import uuid import uuid
from pathlib import Path from pathlib import Path
from unittest import mock from unittest import mock
import pytest
from django.test import TestCase from django.test import TestCase
from django.test import override_settings from django.test import override_settings
@ -25,59 +23,37 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
self.fail(f"'{s}' is not in '{content}'") self.fail(f"'{s}' is not in '{content}'")
self.assertListEqual(indices, sorted(indices)) self.assertListEqual(indices, sorted(indices))
@pytest.mark.skipif( @mock.patch("paperless_remote.parsers.subprocess.run")
sys.version_info > (3, 10), @mock.patch("azure.ai.documentintelligence.DocumentIntelligenceClient")
reason="Fails on 3.11 only on CI, for some reason", def test_get_text_with_azure(self, mock_client_cls, mock_subprocess):
) # TODO: investigate # Arrange mock Azure client
@mock.patch("azure.ai.formrecognizer.DocumentAnalysisClient") mock_client = mock.Mock()
def test_get_text_with_azure(self, mock_azure_client): mock_client_cls.return_value = mock_client
result = mock.Mock()
result.content = "This is a test document." # Simulate poller result and its `.details`
result.pages = [ mock_poller = mock.Mock()
mock.Mock( mock_poller.wait.return_value = None
width=100, mock_poller.details = {"operation_id": "fake-op-id"}
height=100, mock_client.begin_analyze_document.return_value = mock_poller
words=[
mock.Mock( # Return dummy PDF bytes
content="This", mock_client.get_analyze_result_pdf.return_value = [
polygon=[ b"%PDF-",
mock.Mock(x=0, y=0), b"1.7 ",
], b"FAKEPDF",
),
mock.Mock(
content="is",
polygon=[
mock.Mock(x=10, y=10),
],
),
mock.Mock(
content="a",
polygon=[
mock.Mock(x=20, y=20),
],
),
mock.Mock(
content="test",
polygon=[
mock.Mock(x=30, y=30),
],
),
mock.Mock(
content="document.",
polygon=[
mock.Mock(x=40, y=40),
],
),
],
),
] ]
mock_azure_client.return_value.begin_analyze_document.return_value.result.return_value = result # Simulate pdftotext by writing dummy text to sidecar file
def fake_run(cmd, *args, **kwargs):
with Path(cmd[-1]).open("w", encoding="utf-8") as f:
f.write("This is a test document.")
mock_subprocess.side_effect = fake_run
with override_settings( with override_settings(
REMOTE_OCR_ENGINE="azureai", REMOTE_OCR_ENGINE="azureai",
REMOTE_OCR_API_KEY="somekey", REMOTE_OCR_API_KEY="somekey",
REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com/", REMOTE_OCR_ENDPOINT="https://endpoint.cognitiveservices.azure.com",
): ):
parser = RemoteDocumentParser(uuid.uuid4()) parser = RemoteDocumentParser(uuid.uuid4())
parser.parse( parser.parse(