Updates various small typing things

This commit is contained in:
Trenton H 2026-04-23 10:11:12 -07:00
parent 1a3b56496a
commit 0ad7974972
8 changed files with 50 additions and 44 deletions

View File

@ -6,6 +6,7 @@ import tempfile
from itertools import islice
from pathlib import Path
from typing import TYPE_CHECKING
from typing import Any
from allauth.mfa.models import Authenticator
from allauth.socialaccount.models import SocialAccount
@ -68,7 +69,7 @@ from paperless_mail.models import MailRule
def serialize_queryset_batched(
queryset: "QuerySet",
queryset: "QuerySet[Any]",
*,
batch_size: int = 500,
) -> "Generator[list[dict], None, None]":
@ -364,7 +365,7 @@ class Command(CryptMixin, PaperlessCommand):
# 2. Create manifest, containing all correspondents, types, tags, storage paths
# note, documents and ui_settings
manifest_key_to_object_query: dict[str, QuerySet] = {
manifest_key_to_object_query: dict[str, QuerySet[Any]] = {
"correspondents": Correspondent.objects.all(),
"tags": Tag.objects.all(),
"document_types": DocumentType.objects.all(),

View File

@ -261,7 +261,7 @@ def get_objects_for_user_owner_aware(
Model: Any,
*,
include_deleted: bool = False,
) -> QuerySet:
) -> QuerySet[Any]:
"""
Returns objects the user owns, are unowned, or has explicit perms.
When include_deleted is True, soft-deleted items are also included.

View File

@ -4,6 +4,7 @@ from __future__ import annotations
import io
from typing import TYPE_CHECKING
from typing import Any
import pytest
from django.core.management import CommandError
@ -122,7 +123,7 @@ def mock_queryset():
This verifies we use .count() instead of len() for querysets.
"""
class MockQuerySet(QuerySet):
class MockQuerySet(QuerySet[Any]):
def __init__(self, items: list):
self._items = items
self.count_called = False

View File

@ -2064,11 +2064,11 @@ class TestWorkflows(
format="json",
)
view_users_perms: QuerySet = get_users_with_perms(
view_users_perms: QuerySet[Any] = get_users_with_perms(
doc,
only_with_perms_in=["view_document"],
)
change_users_perms: QuerySet = get_users_with_perms(
change_users_perms: QuerySet[Any] = get_users_with_perms(
doc,
only_with_perms_in=["change_document"],
)
@ -2079,7 +2079,7 @@ class TestWorkflows(
self.assertIn(self.user3, view_users_perms)
self.assertIn(self.user3, change_users_perms)
group_perms: QuerySet = get_groups_with_perms(doc)
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
# group1 should still have permissions
self.assertIn(self.group1, group_perms)
# group2 should have been added
@ -2845,7 +2845,7 @@ class TestWorkflows(
self.assertEqual(doc.custom_fields.all().count(), 0)
self.assertFalse(self.user3.has_perm("documents.view_document", doc))
self.assertFalse(self.user3.has_perm("documents.change_document", doc))
group_perms: QuerySet = get_groups_with_perms(doc)
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
self.assertNotIn(self.group1, group_perms)
def test_removal_action_document_updated_removeall(self) -> None:
@ -2916,7 +2916,7 @@ class TestWorkflows(
self.assertEqual(doc.custom_fields.all().count(), 0)
self.assertFalse(self.user3.has_perm("documents.view_document", doc))
self.assertFalse(self.user3.has_perm("documents.change_document", doc))
group_perms: QuerySet = get_groups_with_perms(doc)
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
self.assertNotIn(self.group1, group_perms)
def test_removal_action_document_consumed(self) -> None:

View File

@ -1,6 +1,4 @@
import grp
import os
import pwd
import shutil
import stat
import subprocess
@ -38,8 +36,8 @@ def path_check(var: str, directory: Path) -> list[Error]:
except PermissionError:
dir_stat: os.stat_result = Path(directory).stat()
dir_mode: str = stat.filemode(dir_stat.st_mode)
dir_owner: str = pwd.getpwuid(dir_stat.st_uid).pw_name
dir_group: str = grp.getgrgid(dir_stat.st_gid).gr_name
dir_owner: str = ""
dir_group: str = ""
messages.append(
Error(
writeable_message.format(var),

View File

@ -38,7 +38,9 @@ class OutputTypeConfig(BaseConfig):
def __post_init__(self) -> None:
app_config = self._get_config_instance()
self.output_type = app_config.output_type or settings.OCR_OUTPUT_TYPE
self.output_type = app_config.output_type or OutputTypeChoices(
settings.OCR_OUTPUT_TYPE,
)
@dataclasses.dataclass
@ -70,12 +72,13 @@ class OcrConfig(OutputTypeConfig):
self.pages = app_config.pages or settings.OCR_PAGES
self.language = app_config.language or settings.OCR_LANGUAGE
self.mode = app_config.mode or settings.OCR_MODE
self.mode = app_config.mode or ModeChoices(settings.OCR_MODE)
self.archive_file_generation = (
app_config.archive_file_generation or settings.ARCHIVE_FILE_GENERATION
app_config.archive_file_generation
or ArchiveFileGenerationChoices(settings.ARCHIVE_FILE_GENERATION)
)
self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
self.clean = app_config.unpaper_clean or CleanChoices(settings.OCR_CLEAN)
self.deskew = (
app_config.deskew if app_config.deskew is not None else settings.OCR_DESKEW
)
@ -92,7 +95,7 @@ class OcrConfig(OutputTypeConfig):
)
self.color_conversion_strategy = (
app_config.color_conversion_strategy
or settings.OCR_COLOR_CONVERSION_STRATEGY
or ColorConvertChoices(settings.OCR_COLOR_CONVERSION_STRATEGY)
)
user_args = None

View File

@ -16,6 +16,8 @@ from typing import TYPE_CHECKING
import pytest
from paperless.models import ModeChoices
if TYPE_CHECKING:
from pytest_mock import MockerFixture
@ -72,7 +74,7 @@ class TestAutoModeWithText:
)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
simple_digital_pdf_file,
"application/pdf",
@ -106,7 +108,7 @@ class TestAutoModeWithText:
)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
simple_digital_pdf_file,
"application/pdf",
@ -158,7 +160,7 @@ class TestAutoModeNoText:
mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
multi_page_images_pdf_file,
"application/pdf",
@ -200,7 +202,7 @@ class TestAutoModeNoText:
mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
multi_page_images_pdf_file,
"application/pdf",
@ -243,7 +245,7 @@ class TestOffModePdf:
)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "off"
tesseract_parser.settings.mode = ModeChoices.OFF
tesseract_parser.parse(
simple_digital_pdf_file,
"application/pdf",
@ -283,7 +285,7 @@ class TestOffModePdf:
)
mocker.patch("ocrmypdf.pdfa.generate_pdfa_ps")
tesseract_parser.settings.mode = "off"
tesseract_parser.settings.mode = ModeChoices.OFF
tesseract_parser.parse(
simple_digital_pdf_file,
"application/pdf",
@ -323,7 +325,7 @@ class TestOffModeImage:
"""
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "off"
tesseract_parser.settings.mode = ModeChoices.OFF
tesseract_parser.parse(simple_png_file, "image/png", produce_archive=False)
mock_ocr.assert_not_called()
@ -355,7 +357,7 @@ class TestOffModeImage:
)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "off"
tesseract_parser.settings.mode = ModeChoices.OFF
tesseract_parser.parse(simple_png_file, "image/png", produce_archive=True)
mock_convert.assert_called_once_with(simple_png_file)
@ -429,7 +431,7 @@ class TestProduceArchiveFalse:
)
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
simple_digital_pdf_file,
"application/pdf",

View File

@ -18,6 +18,7 @@ from ocrmypdf import SubprocessOutputError
from documents.parsers import ParseError
from documents.parsers import run_convert
from paperless.models import ModeChoices
from paperless.parsers import ParserProtocol
from paperless.parsers.tesseract import RasterisedDocumentParser
from paperless.parsers.tesseract import post_process_text
@ -413,7 +414,7 @@ class TestParsePdf:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "redo"
tesseract_parser.settings.mode = ModeChoices.REDO
tesseract_parser.parse(
tesseract_samples_dir / "with-form.pdf",
"application/pdf",
@ -430,7 +431,7 @@ class TestParsePdf:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "force"
tesseract_parser.settings.mode = ModeChoices.FORCE
tesseract_parser.parse(
tesseract_samples_dir / "with-form.pdf",
"application/pdf",
@ -445,7 +446,7 @@ class TestParsePdf:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(tesseract_samples_dir / "signed.pdf", "application/pdf")
assert tesseract_parser.archive_path is None
assert_ordered_substrings(
@ -461,7 +462,7 @@ class TestParsePdf:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "encrypted.pdf",
"application/pdf",
@ -599,7 +600,7 @@ class TestParseMultiPage:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-images.pdf",
"application/pdf",
@ -626,7 +627,7 @@ class TestParseMultiPage:
- Pages 1 and 2 extracted; page 3 absent
"""
tesseract_parser.settings.pages = 2
tesseract_parser.settings.mode = "redo"
tesseract_parser.settings.mode = ModeChoices.REDO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-images.pdf",
"application/pdf",
@ -652,7 +653,7 @@ class TestParseMultiPage:
- Only page 1 extracted
"""
tesseract_parser.settings.pages = 1
tesseract_parser.settings.mode = "force"
tesseract_parser.settings.mode = ModeChoices.FORCE
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-images.pdf",
"application/pdf",
@ -754,7 +755,7 @@ class TestSkipArchive:
- Text extracted from original; no archive created (text exists +
produce_archive=False skips OCRmyPDF entirely)
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-digital.pdf",
"application/pdf",
@ -780,7 +781,7 @@ class TestSkipArchive:
THEN:
- Text extracted; archive created (OCR needed, no existing text)
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-images.pdf",
"application/pdf",
@ -838,7 +839,7 @@ class TestSkipArchive:
- archive_path is set if and only if produce_archive=True
- Text is always extracted
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / filename,
"application/pdf",
@ -868,7 +869,7 @@ class TestSkipArchive:
- Text is extracted from the original via pdftotext
- No archive is produced
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
mock_ocr = mocker.patch("ocrmypdf.ocr")
tesseract_parser.parse(
tesseract_samples_dir / "simple-digital.pdf",
@ -895,7 +896,7 @@ class TestSkipArchive:
- Archive is produced
- Text is preserved from the original
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "simple-digital.pdf",
"application/pdf",
@ -925,7 +926,7 @@ class TestParseMixed:
THEN:
- All pages extracted; archive created; sidecar notes skipped pages
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-mixed.pdf",
"application/pdf",
@ -953,7 +954,7 @@ class TestParseMixed:
THEN:
- Both text layer and image text extracted; archive created
"""
tesseract_parser.settings.mode = "redo"
tesseract_parser.settings.mode = ModeChoices.REDO
tesseract_parser.parse(
tesseract_samples_dir / "single-page-mixed.pdf",
"application/pdf",
@ -989,7 +990,7 @@ class TestParseMixed:
THEN:
- No archive created (produce_archive=False); text from text layer present
"""
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.parse(
tesseract_samples_dir / "multi-page-mixed.pdf",
"application/pdf",
@ -1013,7 +1014,7 @@ class TestParseRotate:
tesseract_parser: RasterisedDocumentParser,
tesseract_samples_dir: Path,
) -> None:
tesseract_parser.settings.mode = "auto"
tesseract_parser.settings.mode = ModeChoices.AUTO
tesseract_parser.settings.rotate = True
tesseract_parser.parse(tesseract_samples_dir / "rotated.pdf", "application/pdf")
assert_ordered_substrings(
@ -1052,7 +1053,7 @@ class TestParseRtl:
force-ocr with English Tesseract (producing garbage). Using mode="off" forces
skip_text=True so the Arabic text layer is preserved through PDF/A conversion.
"""
tesseract_parser.settings.mode = "off"
tesseract_parser.settings.mode = ModeChoices.OFF
tesseract_parser.parse(
tesseract_samples_dir / "rtl-test.pdf",
"application/pdf",