mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-23 17:52:23 -04:00
* Saving some start on this * At least partially working for the tesseract parser * Problems with migration testing need to figure out * Work around that error * Fixes max m_pixels * Moving the settings to main paperless application * Starting some consumer options * More fixes and work * Fixes these last tests * Fix max_length on OcrSettings.mode field * Fix all fields on Common & Ocr settings serializers * Umbrellla config view * Revert "Umbrellla config view" This reverts commit fbaf9f4be30f89afeb509099180158a3406416a5. * Updates to use a single configuration object for all settings * Squashed commit of the following: commit 8a0a49dd5766094f60462fbfbe62e9921fbd2373 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 23:02:47 2023 -0800 Fix formatting commit 66b2d90c507b8afd9507813ff555e46198ea33b9 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 22:36:35 2023 -0800 Refactor frontend data models commit 5723bd8dd823ee855625e250df39393e26709d48 Author: Adam Bogdał <adam@bogdal.pl> Date: Wed Dec 20 01:17:43 2023 +0100 Fix: speed up admin panel for installs with a large number of documents (#5052) commit 9b08ce176199bf9011a6634bb88f616846150d2b Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:18:51 2023 -0800 Update PULL_REQUEST_TEMPLATE.md commit a6248bec2d793b7690feed95fcaf5eb34a75bfb6 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 15:02:05 2023 -0800 Chore: Update Angular to v17 (#4980) commit b1f6f52486d5ba5c04af99b41315eb6428fd1fa8 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:53:56 2023 -0800 Fix: Dont allow null custom_fields property via API (#5063) commit 638d9970fd468d8c02c91d19bd28f8b0796bdcb1 Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 13:43:50 2023 -0800 Enhancement: symmetric document links (#4907) commit 5e8de4c1da6eb4eb8f738b20962595c7536b30ec Author: shamoon <4887959+shamoon@users.noreply.github.com> Date: Tue Dec 19 12:45:04 2023 -0800 Enhancement: shared icon & shared by me filter (#4859) commit 088bad90306025d3f6b139cbd0ad264a1cbecfe5 Author: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Tue Dec 19 12:04:03 2023 -0800 Bulk updates all the backend libraries (#5061) * Saving some work on frontend config * Very basic but dynamically-generated config form * Saving work on slightly less ugly frontend config * JSON validation for user_args field * Fully dynamic config form * Adds in some additional validators for a nicer error message * Cleaning up the testing and coverage more * Reverts unintentional change * Adds documentation about the settings and the precedence * Couple more commenting and style fixes --------- Co-authored-by: shamoon <4887959+shamoon@users.noreply.github.com>
233 lines
7.9 KiB
Python
233 lines
7.9 KiB
Python
import json
|
|
|
|
from django.test import TestCase
|
|
from django.test import override_settings
|
|
|
|
from documents.tests.utils import DirectoriesMixin
|
|
from documents.tests.utils import FileSystemAssertsMixin
|
|
from paperless.models import ApplicationConfiguration
|
|
from paperless.models import CleanChoices
|
|
from paperless.models import ColorConvertChoices
|
|
from paperless.models import ModeChoices
|
|
from paperless.models import OutputTypeChoices
|
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
|
|
|
|
|
class TestParserSettingsFromDb(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
|
|
@staticmethod
|
|
def get_params():
|
|
"""
|
|
Helper to get just the OCRMyPDF parameters from the parser
|
|
"""
|
|
return RasterisedDocumentParser(None).construct_ocrmypdf_parameters(
|
|
input_file="input.pdf",
|
|
output_file="output.pdf",
|
|
sidecar_file="sidecar.txt",
|
|
mime_type="application/pdf",
|
|
safe_fallback=False,
|
|
)
|
|
|
|
def test_db_settings_ocr_pages(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_PAGES than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_PAGES=10):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.pages = 5
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["pages"], "1-5")
|
|
|
|
def test_db_settings_ocr_language(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_LANGUAGE than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_LANGUAGE="eng+deu"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.language = "fra+ita"
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["language"], "fra+ita")
|
|
|
|
def test_db_settings_ocr_output_type(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_OUTPUT_TYPE than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_OUTPUT_TYPE="pdfa-3"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.output_type = OutputTypeChoices.PDF_A
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(params["output_type"], "pdfa")
|
|
|
|
def test_db_settings_ocr_mode(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_MODE than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_MODE="redo"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.mode = ModeChoices.SKIP
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["skip_text"])
|
|
self.assertNotIn("redo_ocr", params)
|
|
self.assertNotIn("force_ocr", params)
|
|
|
|
def test_db_settings_ocr_clean(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_CLEAN than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_CLEAN="clean-final"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.unpaper_clean = CleanChoices.CLEAN
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["clean"])
|
|
self.assertNotIn("clean_final", params)
|
|
|
|
with override_settings(OCR_CLEAN="clean-final"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.unpaper_clean = CleanChoices.FINAL
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["clean_final"])
|
|
self.assertNotIn("clean", params)
|
|
|
|
def test_db_settings_ocr_deskew(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_DESKEW than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_DESKEW=False):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.deskew = True
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["deskew"])
|
|
|
|
def test_db_settings_ocr_rotate(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_ROTATE_PAGES
|
|
and OCR_ROTATE_PAGES_THRESHOLD than configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_ROTATE_PAGES=False, OCR_ROTATE_PAGES_THRESHOLD=30.0):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.rotate_pages = True
|
|
instance.rotate_pages_threshold = 15.0
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertTrue(params["rotate_pages"])
|
|
self.assertAlmostEqual(params["rotate_pages_threshold"], 15.0)
|
|
|
|
def test_db_settings_ocr_max_pixels(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_MAX_IMAGE_PIXELS than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_MAX_IMAGE_PIXELS=2_000_000.0):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.max_image_pixels = 1_000_000.0
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertAlmostEqual(params["max_image_mpixels"], 1.0)
|
|
|
|
def test_db_settings_ocr_color_convert(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_COLOR_CONVERSION_STRATEGY than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(OCR_COLOR_CONVERSION_STRATEGY="LeaveColorUnchanged"):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.color_conversion_strategy = ColorConvertChoices.INDEPENDENT
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
self.assertEqual(
|
|
params["color_conversion_strategy"],
|
|
"UseDeviceIndependentColor",
|
|
)
|
|
|
|
def test_ocr_user_args(self):
|
|
"""
|
|
GIVEN:
|
|
- Django settings defines different value for OCR_USER_ARGS than
|
|
configuration object
|
|
WHEN:
|
|
- OCR parameters are constructed
|
|
THEN:
|
|
- Configuration from database is utilized
|
|
"""
|
|
with override_settings(
|
|
OCR_USER_ARGS=json.dumps({"continue_on_soft_render_error": True}),
|
|
):
|
|
instance = ApplicationConfiguration.objects.all().first()
|
|
instance.user_args = {"unpaper_args": "--pre-rotate 90"}
|
|
instance.save()
|
|
|
|
params = self.get_params()
|
|
|
|
self.assertIn("unpaper_args", params)
|
|
self.assertEqual(
|
|
params["unpaper_args"],
|
|
"--pre-rotate 90",
|
|
)
|