import shutil from datetime import timedelta from pathlib import Path from unittest import mock from celery import states from django.conf import settings from django.test import TestCase from django.test import override_settings from django.utils import timezone from documents import tasks from documents.models import Correspondent from documents.models import Document from documents.models import DocumentType from documents.models import PaperlessTask from documents.models import Tag from documents.sanity_checker import SanityCheckFailedException from documents.sanity_checker import SanityCheckMessages from documents.tests.test_classifier import dummy_preprocess from documents.tests.utils import DirectoriesMixin from documents.tests.utils import FileSystemAssertsMixin class TestIndexReindex(DirectoriesMixin, TestCase): def test_index_reindex(self): Document.objects.create( title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now(), ) tasks.index_reindex() def test_index_optimize(self): Document.objects.create( title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now(), ) tasks.index_optimize() class TestClassifier(DirectoriesMixin, FileSystemAssertsMixin, TestCase): @mock.patch("documents.tasks.load_classifier") def test_train_classifier_no_auto_matching(self, load_classifier): tasks.train_classifier() load_classifier.assert_not_called() @mock.patch("documents.tasks.load_classifier") def test_train_classifier_with_auto_tag(self, load_classifier): load_classifier.return_value = None Tag.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") tasks.train_classifier() load_classifier.assert_called_once() self.assertIsNotFile(settings.MODEL_FILE) @mock.patch("documents.tasks.load_classifier") def test_train_classifier_with_auto_type(self, load_classifier): load_classifier.return_value = None DocumentType.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") tasks.train_classifier() load_classifier.assert_called_once() self.assertIsNotFile(settings.MODEL_FILE) @mock.patch("documents.tasks.load_classifier") def test_train_classifier_with_auto_correspondent(self, load_classifier): load_classifier.return_value = None Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") tasks.train_classifier() load_classifier.assert_called_once() self.assertIsNotFile(settings.MODEL_FILE) def test_train_classifier(self): c = Correspondent.objects.create(matching_algorithm=Tag.MATCH_AUTO, name="test") doc = Document.objects.create(correspondent=c, content="test", title="test") self.assertIsNotFile(settings.MODEL_FILE) with mock.patch( "documents.classifier.DocumentClassifier.preprocess_content", ) as pre_proc_mock: pre_proc_mock.side_effect = dummy_preprocess tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) mtime = Path(settings.MODEL_FILE).stat().st_mtime tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) mtime2 = Path(settings.MODEL_FILE).stat().st_mtime self.assertEqual(mtime, mtime2) doc.content = "test2" doc.save() tasks.train_classifier() self.assertIsFile(settings.MODEL_FILE) mtime3 = Path(settings.MODEL_FILE).stat().st_mtime self.assertNotEqual(mtime2, mtime3) class TestSanityCheck(DirectoriesMixin, TestCase): @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_success(self, m): m.return_value = SanityCheckMessages() self.assertEqual(tasks.sanity_check(), "No issues detected.") m.assert_called_once() @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_error(self, m): messages = SanityCheckMessages() messages.error(None, "Some error") m.return_value = messages self.assertRaises(SanityCheckFailedException, tasks.sanity_check) m.assert_called_once() @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_error_no_raise(self, m): messages = SanityCheckMessages() messages.error(None, "Some error") m.return_value = messages # No exception should be raised result = tasks.sanity_check(raise_on_error=False) self.assertEqual( result, "Sanity check exited with errors. See log.", ) m.assert_called_once() @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_warning(self, m): messages = SanityCheckMessages() messages.warning(None, "Some warning") m.return_value = messages self.assertEqual( tasks.sanity_check(), "Sanity check exited with warnings. See log.", ) m.assert_called_once() @mock.patch("documents.tasks.sanity_checker.check_sanity") def test_sanity_check_info(self, m): messages = SanityCheckMessages() messages.info(None, "Some info") m.return_value = messages self.assertEqual( tasks.sanity_check(), "Sanity check exited with infos. See log.", ) m.assert_called_once() class TestBulkUpdate(DirectoriesMixin, TestCase): def test_bulk_update_documents(self): doc1 = Document.objects.create( title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now(), ) tasks.bulk_update_documents([doc1.pk]) class TestEmptyTrashTask(DirectoriesMixin, FileSystemAssertsMixin, TestCase): """ GIVEN: - Existing document in trash WHEN: - Empty trash task is called without doc_ids THEN: - Document is only deleted if it has been in trash for more than delay (default 30 days) """ def test_empty_trash(self): doc = Document.objects.create( title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now(), ) doc.delete() self.assertEqual(Document.global_objects.count(), 1) self.assertEqual(Document.objects.count(), 0) tasks.empty_trash() self.assertEqual(Document.global_objects.count(), 1) doc.deleted_at = timezone.now() - timedelta(days=31) doc.save() tasks.empty_trash() self.assertEqual(Document.global_objects.count(), 0) class TestUpdateContent(DirectoriesMixin, TestCase): def test_update_content_maybe_archive_file(self): """ GIVEN: - Existing document with archive file WHEN: - Update content task is called THEN: - Document is reprocessed, content and checksum are updated """ sample1 = self.dirs.scratch_dir / "sample.pdf" shutil.copy( Path(__file__).parent / "samples" / "documents" / "originals" / "0000001.pdf", sample1, ) sample1_archive = self.dirs.archive_dir / "sample_archive.pdf" shutil.copy( Path(__file__).parent / "samples" / "documents" / "originals" / "0000001.pdf", sample1_archive, ) doc = Document.objects.create( title="test", content="my document", checksum="wow", archive_checksum="wow", filename=sample1, mime_type="application/pdf", archive_filename=sample1_archive, ) tasks.update_document_content_maybe_archive_file(doc.pk) self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test") self.assertNotEqual(Document.objects.get(pk=doc.pk).archive_checksum, "wow") def test_update_content_maybe_archive_file_no_archive(self): """ GIVEN: - Existing document without archive file WHEN: - Update content task is called THEN: - Document is reprocessed, content is updated """ sample1 = self.dirs.scratch_dir / "sample.pdf" shutil.copy( Path(__file__).parent / "samples" / "documents" / "originals" / "0000001.pdf", sample1, ) doc = Document.objects.create( title="test", content="my document", checksum="wow", filename=sample1, mime_type="application/pdf", ) tasks.update_document_content_maybe_archive_file(doc.pk) self.assertNotEqual(Document.objects.get(pk=doc.pk).content, "test") class TestAIIndex(DirectoriesMixin, TestCase): @override_settings( AI_ENABLED=True, LLM_EMBEDDING_BACKEND="huggingface", ) def test_ai_index_success(self): """ GIVEN: - Document exists, AI is enabled, llm index backend is set WHEN: - llmindex_index task is called THEN: - update_llm_index is called, and the task is marked as success """ Document.objects.create( title="test", content="my document", checksum="wow", ) # lazy-loaded so mock the actual function with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index: update_llm_index.return_value = "LLM index updated successfully." tasks.llmindex_index() update_llm_index.assert_called_once() task = PaperlessTask.objects.get( task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, ) self.assertEqual(task.status, states.SUCCESS) self.assertEqual(task.result, "LLM index updated successfully.") @override_settings( AI_ENABLED=True, LLM_EMBEDDING_BACKEND="huggingface", ) def test_ai_index_failure(self): """ GIVEN: - Document exists, AI is enabled, llm index backend is set WHEN: - llmindex_index task is called THEN: - update_llm_index raises an exception, and the task is marked as failure """ Document.objects.create( title="test", content="my document", checksum="wow", ) # lazy-loaded so mock the actual function with mock.patch("paperless_ai.indexing.update_llm_index") as update_llm_index: update_llm_index.side_effect = Exception("LLM index update failed.") tasks.llmindex_index() update_llm_index.assert_called_once() task = PaperlessTask.objects.get( task_name=PaperlessTask.TaskName.LLMINDEX_UPDATE, ) self.assertEqual(task.status, states.FAILURE) self.assertIn("LLM index update failed.", task.result) def test_update_document_in_llm_index(self): """ GIVEN: - Nothing WHEN: - update_document_in_llm_index task is called THEN: - llm_index_add_or_update_document is called """ doc = Document.objects.create( title="test", content="my document", checksum="wow", ) with mock.patch( "documents.tasks.llm_index_add_or_update_document", ) as llm_index_add_or_update_document: tasks.update_document_in_llm_index(doc) llm_index_add_or_update_document.assert_called_once_with(doc) def test_remove_document_from_llm_index(self): """ GIVEN: - Nothing WHEN: - remove_document_from_llm_index task is called THEN: - llm_index_remove_document is called """ doc = Document.objects.create( title="test", content="my document", checksum="wow", ) with mock.patch( "documents.tasks.llm_index_remove_document", ) as llm_index_remove_document: tasks.remove_document_from_llm_index(doc) llm_index_remove_document.assert_called_once_with(doc)