From 0ad7974972c5ec8e2ff64655dbdc10316f1d0ec2 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Thu, 23 Apr 2026 10:11:12 -0700 Subject: [PATCH] Updates various small typing things --- .../management/commands/document_exporter.py | 5 +-- src/documents/permissions.py | 2 +- .../management/test_management_base_cmd.py | 3 +- src/documents/tests/test_workflows.py | 10 +++--- src/paperless/checks.py | 6 ++-- src/paperless/config.py | 13 ++++--- .../tests/parsers/test_parse_modes.py | 20 ++++++----- .../tests/parsers/test_tesseract_parser.py | 35 ++++++++++--------- 8 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/documents/management/commands/document_exporter.py b/src/documents/management/commands/document_exporter.py index 562a2ca8d..efb19d587 100644 --- a/src/documents/management/commands/document_exporter.py +++ b/src/documents/management/commands/document_exporter.py @@ -6,6 +6,7 @@ import tempfile from itertools import islice from pathlib import Path from typing import TYPE_CHECKING +from typing import Any from allauth.mfa.models import Authenticator from allauth.socialaccount.models import SocialAccount @@ -68,7 +69,7 @@ from paperless_mail.models import MailRule def serialize_queryset_batched( - queryset: "QuerySet", + queryset: "QuerySet[Any]", *, batch_size: int = 500, ) -> "Generator[list[dict], None, None]": @@ -364,7 +365,7 @@ class Command(CryptMixin, PaperlessCommand): # 2. Create manifest, containing all correspondents, types, tags, storage paths # note, documents and ui_settings - manifest_key_to_object_query: dict[str, QuerySet] = { + manifest_key_to_object_query: dict[str, QuerySet[Any]] = { "correspondents": Correspondent.objects.all(), "tags": Tag.objects.all(), "document_types": DocumentType.objects.all(), diff --git a/src/documents/permissions.py b/src/documents/permissions.py index 99e1425de..2faf36b18 100644 --- a/src/documents/permissions.py +++ b/src/documents/permissions.py @@ -261,7 +261,7 @@ def get_objects_for_user_owner_aware( Model: Any, *, include_deleted: bool = False, -) -> QuerySet: +) -> QuerySet[Any]: """ Returns objects the user owns, are unowned, or has explicit perms. When include_deleted is True, soft-deleted items are also included. diff --git a/src/documents/tests/management/test_management_base_cmd.py b/src/documents/tests/management/test_management_base_cmd.py index d4c99f151..5ce59aa85 100644 --- a/src/documents/tests/management/test_management_base_cmd.py +++ b/src/documents/tests/management/test_management_base_cmd.py @@ -4,6 +4,7 @@ from __future__ import annotations import io from typing import TYPE_CHECKING +from typing import Any import pytest from django.core.management import CommandError @@ -122,7 +123,7 @@ def mock_queryset(): This verifies we use .count() instead of len() for querysets. """ - class MockQuerySet(QuerySet): + class MockQuerySet(QuerySet[Any]): def __init__(self, items: list): self._items = items self.count_called = False diff --git a/src/documents/tests/test_workflows.py b/src/documents/tests/test_workflows.py index 87cc54779..538a70255 100644 --- a/src/documents/tests/test_workflows.py +++ b/src/documents/tests/test_workflows.py @@ -2064,11 +2064,11 @@ class TestWorkflows( format="json", ) - view_users_perms: QuerySet = get_users_with_perms( + view_users_perms: QuerySet[Any] = get_users_with_perms( doc, only_with_perms_in=["view_document"], ) - change_users_perms: QuerySet = get_users_with_perms( + change_users_perms: QuerySet[Any] = get_users_with_perms( doc, only_with_perms_in=["change_document"], ) @@ -2079,7 +2079,7 @@ class TestWorkflows( self.assertIn(self.user3, view_users_perms) self.assertIn(self.user3, change_users_perms) - group_perms: QuerySet = get_groups_with_perms(doc) + group_perms: QuerySet[Any] = get_groups_with_perms(doc) # group1 should still have permissions self.assertIn(self.group1, group_perms) # group2 should have been added @@ -2845,7 +2845,7 @@ class TestWorkflows( self.assertEqual(doc.custom_fields.all().count(), 0) self.assertFalse(self.user3.has_perm("documents.view_document", doc)) self.assertFalse(self.user3.has_perm("documents.change_document", doc)) - group_perms: QuerySet = get_groups_with_perms(doc) + group_perms: QuerySet[Any] = get_groups_with_perms(doc) self.assertNotIn(self.group1, group_perms) def test_removal_action_document_updated_removeall(self) -> None: @@ -2916,7 +2916,7 @@ class TestWorkflows( self.assertEqual(doc.custom_fields.all().count(), 0) self.assertFalse(self.user3.has_perm("documents.view_document", doc)) self.assertFalse(self.user3.has_perm("documents.change_document", doc)) - group_perms: QuerySet = get_groups_with_perms(doc) + group_perms: QuerySet[Any] = get_groups_with_perms(doc) self.assertNotIn(self.group1, group_perms) def test_removal_action_document_consumed(self) -> None: diff --git a/src/paperless/checks.py b/src/paperless/checks.py index fbcae320a..adf8b9f07 100644 --- a/src/paperless/checks.py +++ b/src/paperless/checks.py @@ -1,6 +1,4 @@ -import grp import os -import pwd import shutil import stat import subprocess @@ -38,8 +36,8 @@ def path_check(var: str, directory: Path) -> list[Error]: except PermissionError: dir_stat: os.stat_result = Path(directory).stat() dir_mode: str = stat.filemode(dir_stat.st_mode) - dir_owner: str = pwd.getpwuid(dir_stat.st_uid).pw_name - dir_group: str = grp.getgrgid(dir_stat.st_gid).gr_name + dir_owner: str = "" + dir_group: str = "" messages.append( Error( writeable_message.format(var), diff --git a/src/paperless/config.py b/src/paperless/config.py index 8363cfb1f..5204b677b 100644 --- a/src/paperless/config.py +++ b/src/paperless/config.py @@ -38,7 +38,9 @@ class OutputTypeConfig(BaseConfig): def __post_init__(self) -> None: app_config = self._get_config_instance() - self.output_type = app_config.output_type or settings.OCR_OUTPUT_TYPE + self.output_type = app_config.output_type or OutputTypeChoices( + settings.OCR_OUTPUT_TYPE, + ) @dataclasses.dataclass @@ -70,12 +72,13 @@ class OcrConfig(OutputTypeConfig): self.pages = app_config.pages or settings.OCR_PAGES self.language = app_config.language or settings.OCR_LANGUAGE - self.mode = app_config.mode or settings.OCR_MODE + self.mode = app_config.mode or ModeChoices(settings.OCR_MODE) self.archive_file_generation = ( - app_config.archive_file_generation or settings.ARCHIVE_FILE_GENERATION + app_config.archive_file_generation + or ArchiveFileGenerationChoices(settings.ARCHIVE_FILE_GENERATION) ) self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI - self.clean = app_config.unpaper_clean or settings.OCR_CLEAN + self.clean = app_config.unpaper_clean or CleanChoices(settings.OCR_CLEAN) self.deskew = ( app_config.deskew if app_config.deskew is not None else settings.OCR_DESKEW ) @@ -92,7 +95,7 @@ class OcrConfig(OutputTypeConfig): ) self.color_conversion_strategy = ( app_config.color_conversion_strategy - or settings.OCR_COLOR_CONVERSION_STRATEGY + or ColorConvertChoices(settings.OCR_COLOR_CONVERSION_STRATEGY) ) user_args = None diff --git a/src/paperless/tests/parsers/test_parse_modes.py b/src/paperless/tests/parsers/test_parse_modes.py index bf95cdaf8..1c1f4bd48 100644 --- a/src/paperless/tests/parsers/test_parse_modes.py +++ b/src/paperless/tests/parsers/test_parse_modes.py @@ -16,6 +16,8 @@ from typing import TYPE_CHECKING import pytest +from paperless.models import ModeChoices + if TYPE_CHECKING: from pytest_mock import MockerFixture @@ -72,7 +74,7 @@ class TestAutoModeWithText: ) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( simple_digital_pdf_file, "application/pdf", @@ -106,7 +108,7 @@ class TestAutoModeWithText: ) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( simple_digital_pdf_file, "application/pdf", @@ -158,7 +160,7 @@ class TestAutoModeNoText: mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( multi_page_images_pdf_file, "application/pdf", @@ -200,7 +202,7 @@ class TestAutoModeNoText: mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( multi_page_images_pdf_file, "application/pdf", @@ -243,7 +245,7 @@ class TestOffModePdf: ) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "off" + tesseract_parser.settings.mode = ModeChoices.OFF tesseract_parser.parse( simple_digital_pdf_file, "application/pdf", @@ -283,7 +285,7 @@ class TestOffModePdf: ) mocker.patch("ocrmypdf.pdfa.generate_pdfa_ps") - tesseract_parser.settings.mode = "off" + tesseract_parser.settings.mode = ModeChoices.OFF tesseract_parser.parse( simple_digital_pdf_file, "application/pdf", @@ -323,7 +325,7 @@ class TestOffModeImage: """ mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "off" + tesseract_parser.settings.mode = ModeChoices.OFF tesseract_parser.parse(simple_png_file, "image/png", produce_archive=False) mock_ocr.assert_not_called() @@ -355,7 +357,7 @@ class TestOffModeImage: ) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "off" + tesseract_parser.settings.mode = ModeChoices.OFF tesseract_parser.parse(simple_png_file, "image/png", produce_archive=True) mock_convert.assert_called_once_with(simple_png_file) @@ -429,7 +431,7 @@ class TestProduceArchiveFalse: ) mock_ocr = mocker.patch("ocrmypdf.ocr") - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( simple_digital_pdf_file, "application/pdf", diff --git a/src/paperless/tests/parsers/test_tesseract_parser.py b/src/paperless/tests/parsers/test_tesseract_parser.py index bb8d986b4..32c2146b0 100644 --- a/src/paperless/tests/parsers/test_tesseract_parser.py +++ b/src/paperless/tests/parsers/test_tesseract_parser.py @@ -18,6 +18,7 @@ from ocrmypdf import SubprocessOutputError from documents.parsers import ParseError from documents.parsers import run_convert +from paperless.models import ModeChoices from paperless.parsers import ParserProtocol from paperless.parsers.tesseract import RasterisedDocumentParser from paperless.parsers.tesseract import post_process_text @@ -413,7 +414,7 @@ class TestParsePdf: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "redo" + tesseract_parser.settings.mode = ModeChoices.REDO tesseract_parser.parse( tesseract_samples_dir / "with-form.pdf", "application/pdf", @@ -430,7 +431,7 @@ class TestParsePdf: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "force" + tesseract_parser.settings.mode = ModeChoices.FORCE tesseract_parser.parse( tesseract_samples_dir / "with-form.pdf", "application/pdf", @@ -445,7 +446,7 @@ class TestParsePdf: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse(tesseract_samples_dir / "signed.pdf", "application/pdf") assert tesseract_parser.archive_path is None assert_ordered_substrings( @@ -461,7 +462,7 @@ class TestParsePdf: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "encrypted.pdf", "application/pdf", @@ -599,7 +600,7 @@ class TestParseMultiPage: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "multi-page-images.pdf", "application/pdf", @@ -626,7 +627,7 @@ class TestParseMultiPage: - Pages 1 and 2 extracted; page 3 absent """ tesseract_parser.settings.pages = 2 - tesseract_parser.settings.mode = "redo" + tesseract_parser.settings.mode = ModeChoices.REDO tesseract_parser.parse( tesseract_samples_dir / "multi-page-images.pdf", "application/pdf", @@ -652,7 +653,7 @@ class TestParseMultiPage: - Only page 1 extracted """ tesseract_parser.settings.pages = 1 - tesseract_parser.settings.mode = "force" + tesseract_parser.settings.mode = ModeChoices.FORCE tesseract_parser.parse( tesseract_samples_dir / "multi-page-images.pdf", "application/pdf", @@ -754,7 +755,7 @@ class TestSkipArchive: - Text extracted from original; no archive created (text exists + produce_archive=False skips OCRmyPDF entirely) """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "multi-page-digital.pdf", "application/pdf", @@ -780,7 +781,7 @@ class TestSkipArchive: THEN: - Text extracted; archive created (OCR needed, no existing text) """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "multi-page-images.pdf", "application/pdf", @@ -838,7 +839,7 @@ class TestSkipArchive: - archive_path is set if and only if produce_archive=True - Text is always extracted """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / filename, "application/pdf", @@ -868,7 +869,7 @@ class TestSkipArchive: - Text is extracted from the original via pdftotext - No archive is produced """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO mock_ocr = mocker.patch("ocrmypdf.ocr") tesseract_parser.parse( tesseract_samples_dir / "simple-digital.pdf", @@ -895,7 +896,7 @@ class TestSkipArchive: - Archive is produced - Text is preserved from the original """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "simple-digital.pdf", "application/pdf", @@ -925,7 +926,7 @@ class TestParseMixed: THEN: - All pages extracted; archive created; sidecar notes skipped pages """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "multi-page-mixed.pdf", "application/pdf", @@ -953,7 +954,7 @@ class TestParseMixed: THEN: - Both text layer and image text extracted; archive created """ - tesseract_parser.settings.mode = "redo" + tesseract_parser.settings.mode = ModeChoices.REDO tesseract_parser.parse( tesseract_samples_dir / "single-page-mixed.pdf", "application/pdf", @@ -989,7 +990,7 @@ class TestParseMixed: THEN: - No archive created (produce_archive=False); text from text layer present """ - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.parse( tesseract_samples_dir / "multi-page-mixed.pdf", "application/pdf", @@ -1013,7 +1014,7 @@ class TestParseRotate: tesseract_parser: RasterisedDocumentParser, tesseract_samples_dir: Path, ) -> None: - tesseract_parser.settings.mode = "auto" + tesseract_parser.settings.mode = ModeChoices.AUTO tesseract_parser.settings.rotate = True tesseract_parser.parse(tesseract_samples_dir / "rotated.pdf", "application/pdf") assert_ordered_substrings( @@ -1052,7 +1053,7 @@ class TestParseRtl: force-ocr with English Tesseract (producing garbage). Using mode="off" forces skip_text=True so the Arabic text layer is preserved through PDF/A conversion. """ - tesseract_parser.settings.mode = "off" + tesseract_parser.settings.mode = ModeChoices.OFF tesseract_parser.parse( tesseract_samples_dir / "rtl-test.pdf", "application/pdf",