mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-04-24 18:09:57 -04:00
Updates various small typing things
This commit is contained in:
parent
1a3b56496a
commit
0ad7974972
@ -6,6 +6,7 @@ import tempfile
|
||||
from itertools import islice
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
from allauth.mfa.models import Authenticator
|
||||
from allauth.socialaccount.models import SocialAccount
|
||||
@ -68,7 +69,7 @@ from paperless_mail.models import MailRule
|
||||
|
||||
|
||||
def serialize_queryset_batched(
|
||||
queryset: "QuerySet",
|
||||
queryset: "QuerySet[Any]",
|
||||
*,
|
||||
batch_size: int = 500,
|
||||
) -> "Generator[list[dict], None, None]":
|
||||
@ -364,7 +365,7 @@ class Command(CryptMixin, PaperlessCommand):
|
||||
|
||||
# 2. Create manifest, containing all correspondents, types, tags, storage paths
|
||||
# note, documents and ui_settings
|
||||
manifest_key_to_object_query: dict[str, QuerySet] = {
|
||||
manifest_key_to_object_query: dict[str, QuerySet[Any]] = {
|
||||
"correspondents": Correspondent.objects.all(),
|
||||
"tags": Tag.objects.all(),
|
||||
"document_types": DocumentType.objects.all(),
|
||||
|
||||
@ -261,7 +261,7 @@ def get_objects_for_user_owner_aware(
|
||||
Model: Any,
|
||||
*,
|
||||
include_deleted: bool = False,
|
||||
) -> QuerySet:
|
||||
) -> QuerySet[Any]:
|
||||
"""
|
||||
Returns objects the user owns, are unowned, or has explicit perms.
|
||||
When include_deleted is True, soft-deleted items are also included.
|
||||
|
||||
@ -4,6 +4,7 @@ from __future__ import annotations
|
||||
|
||||
import io
|
||||
from typing import TYPE_CHECKING
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
from django.core.management import CommandError
|
||||
@ -122,7 +123,7 @@ def mock_queryset():
|
||||
This verifies we use .count() instead of len() for querysets.
|
||||
"""
|
||||
|
||||
class MockQuerySet(QuerySet):
|
||||
class MockQuerySet(QuerySet[Any]):
|
||||
def __init__(self, items: list):
|
||||
self._items = items
|
||||
self.count_called = False
|
||||
|
||||
@ -2064,11 +2064,11 @@ class TestWorkflows(
|
||||
format="json",
|
||||
)
|
||||
|
||||
view_users_perms: QuerySet = get_users_with_perms(
|
||||
view_users_perms: QuerySet[Any] = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["view_document"],
|
||||
)
|
||||
change_users_perms: QuerySet = get_users_with_perms(
|
||||
change_users_perms: QuerySet[Any] = get_users_with_perms(
|
||||
doc,
|
||||
only_with_perms_in=["change_document"],
|
||||
)
|
||||
@ -2079,7 +2079,7 @@ class TestWorkflows(
|
||||
self.assertIn(self.user3, view_users_perms)
|
||||
self.assertIn(self.user3, change_users_perms)
|
||||
|
||||
group_perms: QuerySet = get_groups_with_perms(doc)
|
||||
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
|
||||
# group1 should still have permissions
|
||||
self.assertIn(self.group1, group_perms)
|
||||
# group2 should have been added
|
||||
@ -2845,7 +2845,7 @@ class TestWorkflows(
|
||||
self.assertEqual(doc.custom_fields.all().count(), 0)
|
||||
self.assertFalse(self.user3.has_perm("documents.view_document", doc))
|
||||
self.assertFalse(self.user3.has_perm("documents.change_document", doc))
|
||||
group_perms: QuerySet = get_groups_with_perms(doc)
|
||||
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
|
||||
self.assertNotIn(self.group1, group_perms)
|
||||
|
||||
def test_removal_action_document_updated_removeall(self) -> None:
|
||||
@ -2916,7 +2916,7 @@ class TestWorkflows(
|
||||
self.assertEqual(doc.custom_fields.all().count(), 0)
|
||||
self.assertFalse(self.user3.has_perm("documents.view_document", doc))
|
||||
self.assertFalse(self.user3.has_perm("documents.change_document", doc))
|
||||
group_perms: QuerySet = get_groups_with_perms(doc)
|
||||
group_perms: QuerySet[Any] = get_groups_with_perms(doc)
|
||||
self.assertNotIn(self.group1, group_perms)
|
||||
|
||||
def test_removal_action_document_consumed(self) -> None:
|
||||
|
||||
@ -1,6 +1,4 @@
|
||||
import grp
|
||||
import os
|
||||
import pwd
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
@ -38,8 +36,8 @@ def path_check(var: str, directory: Path) -> list[Error]:
|
||||
except PermissionError:
|
||||
dir_stat: os.stat_result = Path(directory).stat()
|
||||
dir_mode: str = stat.filemode(dir_stat.st_mode)
|
||||
dir_owner: str = pwd.getpwuid(dir_stat.st_uid).pw_name
|
||||
dir_group: str = grp.getgrgid(dir_stat.st_gid).gr_name
|
||||
dir_owner: str = ""
|
||||
dir_group: str = ""
|
||||
messages.append(
|
||||
Error(
|
||||
writeable_message.format(var),
|
||||
|
||||
@ -38,7 +38,9 @@ class OutputTypeConfig(BaseConfig):
|
||||
def __post_init__(self) -> None:
|
||||
app_config = self._get_config_instance()
|
||||
|
||||
self.output_type = app_config.output_type or settings.OCR_OUTPUT_TYPE
|
||||
self.output_type = app_config.output_type or OutputTypeChoices(
|
||||
settings.OCR_OUTPUT_TYPE,
|
||||
)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@ -70,12 +72,13 @@ class OcrConfig(OutputTypeConfig):
|
||||
|
||||
self.pages = app_config.pages or settings.OCR_PAGES
|
||||
self.language = app_config.language or settings.OCR_LANGUAGE
|
||||
self.mode = app_config.mode or settings.OCR_MODE
|
||||
self.mode = app_config.mode or ModeChoices(settings.OCR_MODE)
|
||||
self.archive_file_generation = (
|
||||
app_config.archive_file_generation or settings.ARCHIVE_FILE_GENERATION
|
||||
app_config.archive_file_generation
|
||||
or ArchiveFileGenerationChoices(settings.ARCHIVE_FILE_GENERATION)
|
||||
)
|
||||
self.image_dpi = app_config.image_dpi or settings.OCR_IMAGE_DPI
|
||||
self.clean = app_config.unpaper_clean or settings.OCR_CLEAN
|
||||
self.clean = app_config.unpaper_clean or CleanChoices(settings.OCR_CLEAN)
|
||||
self.deskew = (
|
||||
app_config.deskew if app_config.deskew is not None else settings.OCR_DESKEW
|
||||
)
|
||||
@ -92,7 +95,7 @@ class OcrConfig(OutputTypeConfig):
|
||||
)
|
||||
self.color_conversion_strategy = (
|
||||
app_config.color_conversion_strategy
|
||||
or settings.OCR_COLOR_CONVERSION_STRATEGY
|
||||
or ColorConvertChoices(settings.OCR_COLOR_CONVERSION_STRATEGY)
|
||||
)
|
||||
|
||||
user_args = None
|
||||
|
||||
@ -16,6 +16,8 @@ from typing import TYPE_CHECKING
|
||||
|
||||
import pytest
|
||||
|
||||
from paperless.models import ModeChoices
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from pytest_mock import MockerFixture
|
||||
|
||||
@ -72,7 +74,7 @@ class TestAutoModeWithText:
|
||||
)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
simple_digital_pdf_file,
|
||||
"application/pdf",
|
||||
@ -106,7 +108,7 @@ class TestAutoModeWithText:
|
||||
)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
simple_digital_pdf_file,
|
||||
"application/pdf",
|
||||
@ -158,7 +160,7 @@ class TestAutoModeNoText:
|
||||
mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
multi_page_images_pdf_file,
|
||||
"application/pdf",
|
||||
@ -200,7 +202,7 @@ class TestAutoModeNoText:
|
||||
mocker.patch.object(tesseract_parser, "extract_text", side_effect=_extract_side)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
multi_page_images_pdf_file,
|
||||
"application/pdf",
|
||||
@ -243,7 +245,7 @@ class TestOffModePdf:
|
||||
)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "off"
|
||||
tesseract_parser.settings.mode = ModeChoices.OFF
|
||||
tesseract_parser.parse(
|
||||
simple_digital_pdf_file,
|
||||
"application/pdf",
|
||||
@ -283,7 +285,7 @@ class TestOffModePdf:
|
||||
)
|
||||
mocker.patch("ocrmypdf.pdfa.generate_pdfa_ps")
|
||||
|
||||
tesseract_parser.settings.mode = "off"
|
||||
tesseract_parser.settings.mode = ModeChoices.OFF
|
||||
tesseract_parser.parse(
|
||||
simple_digital_pdf_file,
|
||||
"application/pdf",
|
||||
@ -323,7 +325,7 @@ class TestOffModeImage:
|
||||
"""
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "off"
|
||||
tesseract_parser.settings.mode = ModeChoices.OFF
|
||||
tesseract_parser.parse(simple_png_file, "image/png", produce_archive=False)
|
||||
|
||||
mock_ocr.assert_not_called()
|
||||
@ -355,7 +357,7 @@ class TestOffModeImage:
|
||||
)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "off"
|
||||
tesseract_parser.settings.mode = ModeChoices.OFF
|
||||
tesseract_parser.parse(simple_png_file, "image/png", produce_archive=True)
|
||||
|
||||
mock_convert.assert_called_once_with(simple_png_file)
|
||||
@ -429,7 +431,7 @@ class TestProduceArchiveFalse:
|
||||
)
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
simple_digital_pdf_file,
|
||||
"application/pdf",
|
||||
|
||||
@ -18,6 +18,7 @@ from ocrmypdf import SubprocessOutputError
|
||||
|
||||
from documents.parsers import ParseError
|
||||
from documents.parsers import run_convert
|
||||
from paperless.models import ModeChoices
|
||||
from paperless.parsers import ParserProtocol
|
||||
from paperless.parsers.tesseract import RasterisedDocumentParser
|
||||
from paperless.parsers.tesseract import post_process_text
|
||||
@ -413,7 +414,7 @@ class TestParsePdf:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "redo"
|
||||
tesseract_parser.settings.mode = ModeChoices.REDO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "with-form.pdf",
|
||||
"application/pdf",
|
||||
@ -430,7 +431,7 @@ class TestParsePdf:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "force"
|
||||
tesseract_parser.settings.mode = ModeChoices.FORCE
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "with-form.pdf",
|
||||
"application/pdf",
|
||||
@ -445,7 +446,7 @@ class TestParsePdf:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(tesseract_samples_dir / "signed.pdf", "application/pdf")
|
||||
assert tesseract_parser.archive_path is None
|
||||
assert_ordered_substrings(
|
||||
@ -461,7 +462,7 @@ class TestParsePdf:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "encrypted.pdf",
|
||||
"application/pdf",
|
||||
@ -599,7 +600,7 @@ class TestParseMultiPage:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-images.pdf",
|
||||
"application/pdf",
|
||||
@ -626,7 +627,7 @@ class TestParseMultiPage:
|
||||
- Pages 1 and 2 extracted; page 3 absent
|
||||
"""
|
||||
tesseract_parser.settings.pages = 2
|
||||
tesseract_parser.settings.mode = "redo"
|
||||
tesseract_parser.settings.mode = ModeChoices.REDO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-images.pdf",
|
||||
"application/pdf",
|
||||
@ -652,7 +653,7 @@ class TestParseMultiPage:
|
||||
- Only page 1 extracted
|
||||
"""
|
||||
tesseract_parser.settings.pages = 1
|
||||
tesseract_parser.settings.mode = "force"
|
||||
tesseract_parser.settings.mode = ModeChoices.FORCE
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-images.pdf",
|
||||
"application/pdf",
|
||||
@ -754,7 +755,7 @@ class TestSkipArchive:
|
||||
- Text extracted from original; no archive created (text exists +
|
||||
produce_archive=False skips OCRmyPDF entirely)
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-digital.pdf",
|
||||
"application/pdf",
|
||||
@ -780,7 +781,7 @@ class TestSkipArchive:
|
||||
THEN:
|
||||
- Text extracted; archive created (OCR needed, no existing text)
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-images.pdf",
|
||||
"application/pdf",
|
||||
@ -838,7 +839,7 @@ class TestSkipArchive:
|
||||
- archive_path is set if and only if produce_archive=True
|
||||
- Text is always extracted
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / filename,
|
||||
"application/pdf",
|
||||
@ -868,7 +869,7 @@ class TestSkipArchive:
|
||||
- Text is extracted from the original via pdftotext
|
||||
- No archive is produced
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
mock_ocr = mocker.patch("ocrmypdf.ocr")
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "simple-digital.pdf",
|
||||
@ -895,7 +896,7 @@ class TestSkipArchive:
|
||||
- Archive is produced
|
||||
- Text is preserved from the original
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "simple-digital.pdf",
|
||||
"application/pdf",
|
||||
@ -925,7 +926,7 @@ class TestParseMixed:
|
||||
THEN:
|
||||
- All pages extracted; archive created; sidecar notes skipped pages
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-mixed.pdf",
|
||||
"application/pdf",
|
||||
@ -953,7 +954,7 @@ class TestParseMixed:
|
||||
THEN:
|
||||
- Both text layer and image text extracted; archive created
|
||||
"""
|
||||
tesseract_parser.settings.mode = "redo"
|
||||
tesseract_parser.settings.mode = ModeChoices.REDO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "single-page-mixed.pdf",
|
||||
"application/pdf",
|
||||
@ -989,7 +990,7 @@ class TestParseMixed:
|
||||
THEN:
|
||||
- No archive created (produce_archive=False); text from text layer present
|
||||
"""
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "multi-page-mixed.pdf",
|
||||
"application/pdf",
|
||||
@ -1013,7 +1014,7 @@ class TestParseRotate:
|
||||
tesseract_parser: RasterisedDocumentParser,
|
||||
tesseract_samples_dir: Path,
|
||||
) -> None:
|
||||
tesseract_parser.settings.mode = "auto"
|
||||
tesseract_parser.settings.mode = ModeChoices.AUTO
|
||||
tesseract_parser.settings.rotate = True
|
||||
tesseract_parser.parse(tesseract_samples_dir / "rotated.pdf", "application/pdf")
|
||||
assert_ordered_substrings(
|
||||
@ -1052,7 +1053,7 @@ class TestParseRtl:
|
||||
force-ocr with English Tesseract (producing garbage). Using mode="off" forces
|
||||
skip_text=True so the Arabic text layer is preserved through PDF/A conversion.
|
||||
"""
|
||||
tesseract_parser.settings.mode = "off"
|
||||
tesseract_parser.settings.mode = ModeChoices.OFF
|
||||
tesseract_parser.parse(
|
||||
tesseract_samples_dir / "rtl-test.pdf",
|
||||
"application/pdf",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user