From dfd370700a66fffccb8b827e292215ec7d24d1f2 Mon Sep 17 00:00:00 2001 From: Trenton H <797416+stumpylog@users.noreply.github.com> Date: Fri, 6 Mar 2026 15:15:33 -0800 Subject: [PATCH] Perf: stream manifest parsing with ijson in document_importer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace bulk json.load of the full manifest (which materializes the entire JSON array into memory) with incremental ijson streaming. Eliminates self.manifest entirely — records are never all in memory at once. - Add ijson>=3.2 dependency - New module-level iter_manifest_records() generator - load_manifest_files() collects paths only; no parsing at load time - check_manifest_validity() streams without accumulating records - decrypt_secret_fields() streams each manifest to a .decrypted.json temp file record-by-record; temp files cleaned up after file copy - _import_files_from_manifest() collects only document records (small fraction of manifest) for the tqdm progress bar Measured on 200 docs + 200 CustomFieldInstances: - Streaming validation: peak memory 3081 KiB -> 333 KiB (89% reduction) - Stream-decrypt to file: peak memory 3081 KiB -> 549 KiB (82% reduction) Co-Authored-By: Claude Sonnet 4.6 --- pyproject.toml | 2 + .../management/commands/document_importer.py | 105 +++++++++++------- src/documents/profiling.py | 71 ++++++++++++ .../tests/test_importer_profile_phase4.py | 101 +++++++++++++++++ .../tests/test_management_importer.py | 25 +++-- uv.lock | 69 ++++++++++++ 6 files changed, 322 insertions(+), 51 deletions(-) create mode 100644 src/documents/profiling.py create mode 100644 src/documents/tests/test_importer_profile_phase4.py diff --git a/pyproject.toml b/pyproject.toml index 6a11163cc..0829adbe1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -49,6 +49,7 @@ dependencies = [ "flower~=2.0.1", "gotenberg-client~=0.13.1", "httpx-oauth~=0.16", + "ijson>=3.2", "imap-tools~=1.11.0", "jinja2~=3.1.5", "langdetect~=1.0.9", @@ -304,6 +305,7 @@ markers = [ "greenmail: Tests requiring Greenmail service", "date_parsing: Tests which cover date parsing from content or filename", "management: Tests which cover management commands/functionality", + "profiling: Benchmarks for comparing implementation performance (not assertions, inspect output manually)", ] [tool.pytest_env] diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 5cd743590..c33b6b4ee 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -8,6 +8,7 @@ from pathlib import Path from zipfile import ZipFile from zipfile import is_zipfile +import ijson import tqdm from django.conf import settings from django.contrib.auth.models import Permission @@ -47,6 +48,15 @@ if settings.AUDIT_LOG_ENABLED: from auditlog.registry import auditlog +def iter_manifest_records(path: Path) -> Generator[dict, None, None]: + """Yield records one at a time from a manifest JSON array via ijson.""" + try: + with path.open("rb") as f: + yield from ijson.items(f, "item") + except ijson.JSONError as e: + raise CommandError(f"Failed to parse manifest file {path}: {e}") from e + + @contextmanager def disable_signal(sig, receiver, sender, *, weak: bool | None = None) -> Generator: try: @@ -147,14 +157,9 @@ class Command(CryptMixin, BaseCommand): Loads manifest data from the various JSON files for parsing and loading the database """ main_manifest_path: Path = self.source / "manifest.json" - - with main_manifest_path.open() as infile: - self.manifest = json.load(infile) self.manifest_paths.append(main_manifest_path) for file in Path(self.source).glob("**/*-manifest.json"): - with file.open() as infile: - self.manifest += json.load(infile) self.manifest_paths.append(file) def load_metadata(self) -> None: @@ -236,7 +241,6 @@ class Command(CryptMixin, BaseCommand): self.version: str | None = None self.salt: str | None = None self.manifest_paths = [] - self.manifest = [] # Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner. with tempfile.TemporaryDirectory() as tmp_dir: @@ -296,6 +300,9 @@ class Command(CryptMixin, BaseCommand): else: self.stdout.write(self.style.NOTICE("Data only import completed")) + for tmp in getattr(self, "_decrypted_tmp_paths", []): + tmp.unlink(missing_ok=True) + self.stdout.write("Updating search index...") call_command( "document_index", @@ -348,11 +355,12 @@ class Command(CryptMixin, BaseCommand): ) from e self.stdout.write("Checking the manifest") - for record in self.manifest: - # Only check if the document files exist if this is not data only - # We don't care about documents for a data only import - if not self.data_only and record["model"] == "documents.document": - check_document_validity(record) + for manifest_path in self.manifest_paths: + for record in iter_manifest_records(manifest_path): + # Only check if the document files exist if this is not data only + # We don't care about documents for a data only import + if not self.data_only and record["model"] == "documents.document": + check_document_validity(record) def _import_files_from_manifest(self) -> None: settings.ORIGINALS_DIR.mkdir(parents=True, exist_ok=True) @@ -361,11 +369,14 @@ class Command(CryptMixin, BaseCommand): self.stdout.write("Copy files into paperless...") - manifest_documents = list( - filter(lambda r: r["model"] == "documents.document", self.manifest), - ) + document_records = [ + record + for manifest_path in self.manifest_paths + for record in iter_manifest_records(manifest_path) + if record["model"] == "documents.document" + ] - for record in tqdm.tqdm(manifest_documents, disable=self.no_progress_bar): + for record in tqdm.tqdm(document_records, disable=self.no_progress_bar): document = Document.objects.get(pk=record["pk"]) doc_file = record[EXPORTER_FILE_NAME] @@ -418,33 +429,43 @@ class Command(CryptMixin, BaseCommand): document.save() + def _decrypt_record_if_needed(self, record: dict) -> dict: + for crypt_config in self.CRYPT_FIELDS: + if record["model"] == crypt_config["model_name"]: + for field in crypt_config["fields"]: + if record["fields"].get(field): + record["fields"][field] = self.decrypt_string( + value=record["fields"][field], + ) + return record + def decrypt_secret_fields(self) -> None: """ - The converse decryption of some fields out of the export before importing to database + The converse decryption of some fields out of the export before importing to database. + Streams records from each manifest path and writes decrypted content to a temp file. """ - if self.passphrase: - # Salt has been loaded from metadata.json at this point, so it cannot be None - self.setup_crypto(passphrase=self.passphrase, salt=self.salt) - - had_at_least_one_record = False - - for crypt_config in self.CRYPT_FIELDS: - importer_model: str = crypt_config["model_name"] - crypt_fields: str = crypt_config["fields"] - for record in filter( - lambda x: x["model"] == importer_model, - self.manifest, - ): - had_at_least_one_record = True - for field in crypt_fields: - if record["fields"][field]: - record["fields"][field] = self.decrypt_string( - value=record["fields"][field], - ) - - if had_at_least_one_record: - # It's annoying, but the DB is loaded from the JSON directly - # Maybe could change that in the future? - (self.source / "manifest.json").write_text( - json.dumps(self.manifest, indent=2, ensure_ascii=False), - ) + if not self.passphrase: + return + # Salt has been loaded from metadata.json at this point, so it cannot be None + self.setup_crypto(passphrase=self.passphrase, salt=self.salt) + self._decrypted_tmp_paths: list[Path] = [] + new_paths: list[Path] = [] + for manifest_path in self.manifest_paths: + tmp = manifest_path.with_name(manifest_path.stem + ".decrypted.json") + with tmp.open("w", encoding="utf-8") as out: + out.write("[\n") + first = True + for record in iter_manifest_records(manifest_path): + if not first: + out.write(",\n") + json.dump( + self._decrypt_record_if_needed(record), + out, + indent=2, + ensure_ascii=False, + ) + first = False + out.write("\n]\n") + self._decrypted_tmp_paths.append(tmp) + new_paths.append(tmp) + self.manifest_paths = new_paths diff --git a/src/documents/profiling.py b/src/documents/profiling.py new file mode 100644 index 000000000..0c938e6dc --- /dev/null +++ b/src/documents/profiling.py @@ -0,0 +1,71 @@ +""" +Temporary profiling utilities for comparing implementations. + +Usage in a management command or shell:: + + from documents.profiling import profile_block + + with profile_block("new check_sanity"): + messages = check_sanity() + + with profile_block("old check_sanity"): + messages = check_sanity_old() + +Drop this file when done. +""" + +from __future__ import annotations + +import tracemalloc +from contextlib import contextmanager +from time import perf_counter +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from collections.abc import Generator + +from django.db import connection +from django.db import reset_queries +from django.test.utils import override_settings + + +@contextmanager +def profile_block(label: str = "block") -> Generator[None, None, None]: + """Profile memory, wall time, and DB queries for a code block. + + Prints a summary to stdout on exit. Requires no external packages. + Enables DEBUG temporarily to capture Django's query log. + """ + tracemalloc.start() + snapshot_before = tracemalloc.take_snapshot() + + with override_settings(DEBUG=True): + reset_queries() + start = perf_counter() + + yield + + elapsed = perf_counter() - start + queries = list(connection.queries) + + snapshot_after = tracemalloc.take_snapshot() + _, peak = tracemalloc.get_traced_memory() + tracemalloc.stop() + + # Compare snapshots for top allocations + stats = snapshot_after.compare_to(snapshot_before, "lineno") + + query_time = sum(float(q["time"]) for q in queries) + mem_diff = sum(s.size_diff for s in stats) + + print(f"\n{'=' * 60}") # noqa: T201 + print(f" Profile: {label}") # noqa: T201 + print(f"{'=' * 60}") # noqa: T201 + print(f" Wall time: {elapsed:.4f}s") # noqa: T201 + print(f" Queries: {len(queries)} ({query_time:.4f}s)") # noqa: T201 + print(f" Memory delta: {mem_diff / 1024:.1f} KiB") # noqa: T201 + print(f" Peak memory: {peak / 1024:.1f} KiB") # noqa: T201 + print("\n Top 5 allocations:") # noqa: T201 + for stat in stats[:5]: + print(f" {stat}") # noqa: T201 + print(f"{'=' * 60}\n") # noqa: T201 diff --git a/src/documents/tests/test_importer_profile_phase4.py b/src/documents/tests/test_importer_profile_phase4.py new file mode 100644 index 000000000..cc8d236ef --- /dev/null +++ b/src/documents/tests/test_importer_profile_phase4.py @@ -0,0 +1,101 @@ +""" +Phase 4 profiling benchmark: ijson streaming parse vs json.load for manifest files. + +Run with: + uv run pytest src/documents/tests/test_importer_profile_phase4.py \ + -m profiling --override-ini="addopts=" -s +""" + +import json +import shutil +import tempfile +from pathlib import Path + +import pytest +from django.core.management import call_command +from django.test import TestCase + +from documents.management.commands.document_importer import iter_manifest_records +from documents.models import CustomField +from documents.models import CustomFieldInstance +from documents.profiling import profile_block +from documents.tests.factories import DocumentFactory +from documents.tests.utils import DirectoriesMixin +from documents.tests.utils import SampleDirMixin + + +@pytest.mark.profiling +class TestImporterProfilePhase4(DirectoriesMixin, SampleDirMixin, TestCase): + """ + Benchmarks streaming ijson parse vs json.load over exported manifest files. + + Creates 200 documents + 1 custom field + 200 custom field instances, + exports them, then compares the parse step in isolation. + + Does not assert on results — inspect printed profile_block output manually. + """ + + def setUp(self) -> None: + super().setUp() + self.export_dir = Path(tempfile.mkdtemp()) + self.addCleanup(shutil.rmtree, self.export_dir) + + def _create_test_data(self) -> None: + cf = CustomField.objects.create( + name="Phase4 Field", + data_type=CustomField.FieldDataType.STRING, + ) + docs = DocumentFactory.create_batch(200) + for doc in docs: + CustomFieldInstance.objects.create( + field=cf, + document=doc, + value_text=f"value for {doc.pk}", + ) + + def _get_manifest_paths(self) -> list[Path]: + paths = [self.export_dir / "manifest.json"] + paths += list(self.export_dir.glob("**/*-manifest.json")) + return [p for p in paths if p.exists()] + + def test_profile_streaming_vs_json_load(self) -> None: + self._create_test_data() + + call_command( + "document_exporter", + str(self.export_dir), + "--no-progress-bar", + "--data-only", + ) + + manifest_paths = self._get_manifest_paths() + self.assertTrue(manifest_paths, "No manifest files found after export") + + # Baseline: json.load then iterate (original approach — loads all into memory) + with profile_block("baseline: json.load + iterate"): + for path in manifest_paths: + with path.open() as f: + records = json.load(f) + for r in records: + _ = r["model"] # simulate check_manifest_validity + + # New: ijson streaming without accumulation (mirrors check_manifest_validity) + with profile_block("new: ijson streaming (no accumulation)"): + for path in manifest_paths: + for record in iter_manifest_records(path): + _ = record["model"] # process one at a time, no list buildup + + # New: ijson stream-decrypt to temp file (mirrors decrypt_secret_fields) + tmp_path = self.export_dir / "manifest.bench.json" + with profile_block("new: ijson stream to temp file"): + for path in manifest_paths: + with tmp_path.open("w", encoding="utf-8") as out: + out.write("[\n") + first = True + for record in iter_manifest_records(path): + if not first: + out.write(",\n") + json.dump(record, out, ensure_ascii=False) + first = False + out.write("\n]\n") + tmp_path.unlink(missing_ok=True) diff --git a/src/documents/tests/test_management_importer.py b/src/documents/tests/test_management_importer.py index 04045c805..137b4004b 100644 --- a/src/documents/tests/test_management_importer.py +++ b/src/documents/tests/test_management_importer.py @@ -119,15 +119,22 @@ class TestCommandImport( # No read permissions original_path.chmod(0o222) + manifest_path = Path(temp_dir) / "manifest.json" + manifest_path.write_text( + json.dumps( + [ + { + "model": "documents.document", + EXPORTER_FILE_NAME: "original.pdf", + EXPORTER_ARCHIVE_NAME: "archive.pdf", + }, + ], + ), + ) + cmd = Command() cmd.source = Path(temp_dir) - cmd.manifest = [ - { - "model": "documents.document", - EXPORTER_FILE_NAME: "original.pdf", - EXPORTER_ARCHIVE_NAME: "archive.pdf", - }, - ] + cmd.manifest_paths = [manifest_path] cmd.data_only = False with self.assertRaises(CommandError) as cm: cmd.check_manifest_validity() @@ -296,7 +303,7 @@ class TestCommandImport( (self.dirs.scratch_dir / "manifest.json").touch() # We're not building a manifest, so it fails, but this test doesn't care - with self.assertRaises(json.decoder.JSONDecodeError): + with self.assertRaises(CommandError): call_command( "document_importer", "--no-progress-bar", @@ -325,7 +332,7 @@ class TestCommandImport( ) # We're not building a manifest, so it fails, but this test doesn't care - with self.assertRaises(json.decoder.JSONDecodeError): + with self.assertRaises(CommandError): call_command( "document_importer", "--no-progress-bar", diff --git a/uv.lock b/uv.lock index 0b6d7b270..c69635232 100644 --- a/uv.lock +++ b/uv.lock @@ -1748,6 +1748,73 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" }, ] +[[package]] +name = "ijson" +version = "3.5.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f4/57/60d1a6a512f2f0508d0bc8b4f1cc5616fd3196619b66bd6a01f9155a1292/ijson-3.5.0.tar.gz", hash = "sha256:94688760720e3f5212731b3cb8d30267f9a045fb38fb3870254e7b9504246f31", size = 68658, upload-time = "2026-02-24T03:58:30.974Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/65/da/644343198abca5e0f6e2486063f8d8f3c443ca0ef5e5c890e51ef6032e33/ijson-3.5.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5616311404b858d32740b7ad8b9a799c62165f5ecb85d0a8ed16c21665a90533", size = 88964, upload-time = "2026-02-24T03:56:53.099Z" }, + { url = "https://files.pythonhosted.org/packages/5b/63/8621190aa2baf96156dfd4c632b6aa9f1464411e50b98750c09acc0505ea/ijson-3.5.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9733f94029dd41702d573ef64752e2556e72aea14623d6dbb7a44ca1ccf30fd", size = 60582, upload-time = "2026-02-24T03:56:54.261Z" }, + { url = "https://files.pythonhosted.org/packages/20/31/6a3f041fdd17dacff33b7d7d3ba3df6dca48740108340c6042f974b2ad20/ijson-3.5.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db8398c6721b98412a4f618da8022550c8b9c5d9214040646071b5deb4d4a393", size = 60632, upload-time = "2026-02-24T03:56:55.159Z" }, + { url = "https://files.pythonhosted.org/packages/e4/68/474541998abbdecfd46a744536878335de89aceb9f085bff1aaf35575ceb/ijson-3.5.0-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c061314845c08163b1784b6076ea5f075372461a32e6916f4e5f211fd4130b64", size = 131988, upload-time = "2026-02-24T03:56:56.35Z" }, + { url = "https://files.pythonhosted.org/packages/cd/32/e05ff8b72a44fe9d192f41c5dcbc35cfa87efc280cdbfe539ffaf4a7535e/ijson-3.5.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1111a1c5ac79119c5d6e836f900c1a53844b50a18af38311baa6bb61e2645aca", size = 138669, upload-time = "2026-02-24T03:56:57.555Z" }, + { url = "https://files.pythonhosted.org/packages/49/b5/955a83b031102c7a602e2c06d03aff0a0e584212f09edb94ccc754d203ac/ijson-3.5.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1e74aff8c681c24002b61b1822f9511d4c384f324f7dbc08c78538e01fdc9fcb", size = 135093, upload-time = "2026-02-24T03:56:59.267Z" }, + { url = "https://files.pythonhosted.org/packages/e8/f2/30250cfcb4d2766669b31f6732689aab2bb91de426a15a3ebe482df7ee48/ijson-3.5.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:739a7229b1b0cc5f7e2785a6e7a5fc915e850d3fed9588d0e89a09f88a417253", size = 138715, upload-time = "2026-02-24T03:57:00.491Z" }, + { url = "https://files.pythonhosted.org/packages/a2/05/785a145d7e75e04e04480d59b6323cd4b1d9013a6cd8643fa635fbc93490/ijson-3.5.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:ef88712160360cab3ca6471a4e5418243f8b267cf1fe1620879d1b5558babc71", size = 133194, upload-time = "2026-02-24T03:57:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/14/eb/80d6f8a748dead4034cea0939494a67d10ccf88d6413bf6e860393139676/ijson-3.5.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6ca0d1b6b5f8166a6248f4309497585fb8553b04bc8179a0260fad636cfdb798", size = 135588, upload-time = "2026-02-24T03:57:03.131Z" }, + { url = "https://files.pythonhosted.org/packages/aa/17/9c63c7688025f3a8c47ea717b8306649c8c7244e49e20a2be4e3515dc75c/ijson-3.5.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:1ebefbe149a6106cc848a3eaf536af51a9b5ccc9082de801389f152dba6ab755", size = 88536, upload-time = "2026-02-24T03:57:06.809Z" }, + { url = "https://files.pythonhosted.org/packages/6f/dd/e15c2400244c117b06585452ebc63ae254f5a6964f712306afd1422daae0/ijson-3.5.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:19e30d9f00f82e64de689c0b8651b9cfed879c184b139d7e1ea5030cec401c21", size = 60499, upload-time = "2026-02-24T03:57:09.155Z" }, + { url = "https://files.pythonhosted.org/packages/77/a9/bf4fe3538a0c965f16b406f180a06105b875da83f0743e36246be64ef550/ijson-3.5.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a04a33ee78a6f27b9b8528c1ca3c207b1df3b8b867a4cf2fcc4109986f35c227", size = 60330, upload-time = "2026-02-24T03:57:10.574Z" }, + { url = "https://files.pythonhosted.org/packages/31/76/6f91bdb019dd978fce1bc5ea1cd620cfc096d258126c91db2c03a20a7f34/ijson-3.5.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7d48dc2984af02eb3c56edfb3f13b3f62f2f3e4fe36f058c8cfc75d93adf4fed", size = 138977, upload-time = "2026-02-24T03:57:11.932Z" }, + { url = "https://files.pythonhosted.org/packages/11/be/bbc983059e48a54b0121ee60042979faed7674490bbe7b2c41560db3f436/ijson-3.5.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f1e73a44844d9adbca9cf2c4132cd875933e83f3d4b23881fcaf82be83644c7d", size = 149785, upload-time = "2026-02-24T03:57:13.255Z" }, + { url = "https://files.pythonhosted.org/packages/6d/81/2fee58f9024a3449aee83edfa7167fb5ccd7e1af2557300e28531bb68e16/ijson-3.5.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7389a56b8562a19948bdf1d7bae3a2edc8c7f86fb59834dcb1c4c722818e645a", size = 149729, upload-time = "2026-02-24T03:57:14.191Z" }, + { url = "https://files.pythonhosted.org/packages/c7/56/f1706761fcc096c9d414b3dcd000b1e6e5c24364c21cfba429837f98ee8d/ijson-3.5.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3176f23f8ebec83f374ed0c3b4e5a0c4db7ede54c005864efebbed46da123608", size = 150697, upload-time = "2026-02-24T03:57:15.855Z" }, + { url = "https://files.pythonhosted.org/packages/d9/6e/ee0d9c875a0193b632b3e9ccd1b22a50685fb510256ad57ba483b6529f77/ijson-3.5.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:6babd88e508630c6ef86c9bebaaf13bb2fb8ec1d8f8868773a03c20253f599bc", size = 142873, upload-time = "2026-02-24T03:57:16.831Z" }, + { url = "https://files.pythonhosted.org/packages/d2/bf/f9d4399d0e6e3fd615035290a71e97c843f17f329b43638c0a01cf112d73/ijson-3.5.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:dc1b3836b174b6db2fa8319f1926fb5445abd195dc963368092103f8579cb8ed", size = 151583, upload-time = "2026-02-24T03:57:17.757Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/d67e764a712c3590627480643a3b51efcc3afa4ef3cb54ee4c989073c97e/ijson-3.5.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:e9cedc10e40dd6023c351ed8bfc7dcfce58204f15c321c3c1546b9c7b12562a4", size = 88544, upload-time = "2026-02-24T03:57:21.293Z" }, + { url = "https://files.pythonhosted.org/packages/1a/39/f1c299371686153fa3cf5c0736b96247a87a1bee1b7145e6d21f359c505a/ijson-3.5.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3647649f782ee06c97490b43680371186651f3f69bebe64c6083ee7615d185e5", size = 60495, upload-time = "2026-02-24T03:57:22.501Z" }, + { url = "https://files.pythonhosted.org/packages/16/94/b1438e204d75e01541bebe3e668fe3e68612d210e9931ae1611062dd0a56/ijson-3.5.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:90e74be1dce05fce73451c62d1118671f78f47c9f6be3991c82b91063bf01fc9", size = 60325, upload-time = "2026-02-24T03:57:23.332Z" }, + { url = "https://files.pythonhosted.org/packages/30/e2/4aa9c116fa86cc8b0f574f3c3a47409edc1cd4face05d0e589a5a176b05d/ijson-3.5.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78e9ad73e7be2dd80627504bd5cbf512348c55ce2c06e362ed7683b5220e8568", size = 138774, upload-time = "2026-02-24T03:57:24.683Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d2/738b88752a70c3be1505faa4dcd7110668c2712e582a6a36488ed1e295d4/ijson-3.5.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9577449313cc94be89a4fe4b3e716c65f09cc19636d5a6b2861c4e80dddebd58", size = 149820, upload-time = "2026-02-24T03:57:26.062Z" }, + { url = "https://files.pythonhosted.org/packages/ed/df/0b3ab9f393ca8f72ea03bc896ba9fdc987e90ae08cdb51c32a4ee0c14d5e/ijson-3.5.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3e4c1178fb50aff5f5701a30a5152ead82a14e189ce0f6102fa1b5f10b2f54ff", size = 149747, upload-time = "2026-02-24T03:57:27.308Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a3/b0037119f75131b78cb00acc2657b1a9d0435475f1f2c5f8f5a170b66b9c/ijson-3.5.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0eb402ab026ffb37a918d75af2b7260fe6cfbce13232cc83728a714dd30bd81d", size = 151027, upload-time = "2026-02-24T03:57:28.522Z" }, + { url = "https://files.pythonhosted.org/packages/22/a0/cb344de1862bf09d8f769c9d25c944078c87dd59a1b496feec5ad96309a4/ijson-3.5.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:5b08ee08355f9f729612a8eb9bf69cc14f9310c3b2a487c6f1c3c65d85216ec4", size = 142996, upload-time = "2026-02-24T03:57:29.774Z" }, + { url = "https://files.pythonhosted.org/packages/ca/32/a8ffd67182e02ea61f70f62daf43ded4fa8a830a2520a851d2782460aba8/ijson-3.5.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:bda62b6d48442903e7bf56152108afb7f0f1293c2b9bef2f2c369defea76ab18", size = 152068, upload-time = "2026-02-24T03:57:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/42/65/13e2492d17e19a2084523e18716dc2809159f2287fd2700c735f311e76c4/ijson-3.5.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:4d4b0cd676b8c842f7648c1a783448fac5cd3b98289abd83711b3e275e143524", size = 93019, upload-time = "2026-02-24T03:57:33.976Z" }, + { url = "https://files.pythonhosted.org/packages/33/92/483fc97ece0c3f1cecabf48f6a7a36e89d19369eec462faaeaa34c788992/ijson-3.5.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:252dec3680a48bb82d475e36b4ae1b3a9d7eb690b951bb98a76c5fe519e30188", size = 62714, upload-time = "2026-02-24T03:57:34.819Z" }, + { url = "https://files.pythonhosted.org/packages/4b/88/793fe020a0fe9d9eed4c285cf4a5cfdb0a935708b3bde0d72f35c794b513/ijson-3.5.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:aa1b5dca97d323931fde2501172337384c958914d81a9dac7f00f0d4bfc76bc7", size = 62460, upload-time = "2026-02-24T03:57:35.874Z" }, + { url = "https://files.pythonhosted.org/packages/51/69/f1a2690aa8d4df1f4e262b385e65a933ffdc250b091531bac9a449c19e16/ijson-3.5.0-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:7a5ec7fd86d606094bba6f6f8f87494897102fa4584ef653f3005c51a784c320", size = 199273, upload-time = "2026-02-24T03:57:37.07Z" }, + { url = "https://files.pythonhosted.org/packages/ea/a2/f1346d5299e79b988ab472dc773d5381ec2d57c23cb2f1af3ede4a810e62/ijson-3.5.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:009f41443e1521847701c6d87fa3923c0b1961be3c7e7de90947c8cb92ea7c44", size = 216884, upload-time = "2026-02-24T03:57:38.346Z" }, + { url = "https://files.pythonhosted.org/packages/28/3c/8b637e869be87799e6c2c3c275a30a546f086b1aed77e2b7f11512168c5a/ijson-3.5.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e4c3651d1f9fe2839a93fdf8fd1d5ca3a54975349894249f3b1b572bcc4bd577", size = 207306, upload-time = "2026-02-24T03:57:39.718Z" }, + { url = "https://files.pythonhosted.org/packages/7f/7c/18b1c1df6951ca056782d7580ec40cea4ff9a27a0947d92640d1cc8c4ae3/ijson-3.5.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:945b7abcfcfeae2cde17d8d900870f03536494245dda7ad4f8d056faa303256c", size = 211364, upload-time = "2026-02-24T03:57:40.953Z" }, + { url = "https://files.pythonhosted.org/packages/f3/55/e795812e82851574a9dba8a53fde045378f531ef14110c6fb55dbd23b443/ijson-3.5.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:0574b0a841ff97495c13e9d7260fbf3d85358b061f540c52a123db9dbbaa2ed6", size = 200608, upload-time = "2026-02-24T03:57:42.272Z" }, + { url = "https://files.pythonhosted.org/packages/5c/cd/013c85b4749b57a4cb4c2670014d1b32b8db4ab1a7be92ea7aeb5d7fe7b5/ijson-3.5.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f969ffb2b89c5cdf686652d7fb66252bc72126fa54d416317411497276056a18", size = 205127, upload-time = "2026-02-24T03:57:43.286Z" }, + { url = "https://files.pythonhosted.org/packages/7a/93/0868efe753dc1df80cc405cf0c1f2527a6991643607c741bff8dcb899b3b/ijson-3.5.0-cp314-cp314-macosx_10_15_universal2.whl", hash = "sha256:25a5a6b2045c90bb83061df27cfa43572afa43ba9408611d7bfe237c20a731a9", size = 89094, upload-time = "2026-02-24T03:57:46.115Z" }, + { url = "https://files.pythonhosted.org/packages/24/94/fd5a832a0df52ef5e4e740f14ac8640725d61034a1b0c561e8b5fb424706/ijson-3.5.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8976c54c0b864bc82b951bae06567566ac77ef63b90a773a69cd73aab47f4f4f", size = 60715, upload-time = "2026-02-24T03:57:47.552Z" }, + { url = "https://files.pythonhosted.org/packages/70/79/1b9a90af5732491f9eec751ee211b86b11011e1158c555c06576d52c3919/ijson-3.5.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:859eb2038f7f1b0664df4241957694cc35e6295992d71c98659b22c69b3cbc10", size = 60638, upload-time = "2026-02-24T03:57:48.428Z" }, + { url = "https://files.pythonhosted.org/packages/23/6f/2c551ea980fe56f68710a8d5389cfbd015fc45aaafd17c3c52c346db6aa1/ijson-3.5.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:c911aa02991c7c0d3639b6619b93a93210ff1e7f58bf7225d613abea10adc78e", size = 140667, upload-time = "2026-02-24T03:57:49.314Z" }, + { url = "https://files.pythonhosted.org/packages/25/0e/27b887879ba6a5bc29766e3c5af4942638c952220fd63e1e442674f7883a/ijson-3.5.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:903cbdc350173605220edc19796fbea9b2203c8b3951fb7335abfa8ed37afda8", size = 149850, upload-time = "2026-02-24T03:57:50.329Z" }, + { url = "https://files.pythonhosted.org/packages/da/1e/23e10e1bc04bf31193b21e2960dce14b17dbd5d0c62204e8401c59d62c08/ijson-3.5.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a4549d96ded5b8efa71639b2160235415f6bdb8c83367615e2dbabcb72755c33", size = 149206, upload-time = "2026-02-24T03:57:51.261Z" }, + { url = "https://files.pythonhosted.org/packages/8e/90/e552f6495063b235cf7fa2c592f6597c057077195e517b842a0374fd470c/ijson-3.5.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:6b2dcf6349e6042d83f3f8c39ce84823cf7577eba25bac5aae5e39bbbbbe9c1c", size = 150438, upload-time = "2026-02-24T03:57:52.198Z" }, + { url = "https://files.pythonhosted.org/packages/5c/18/45bf8f297c41b42a1c231d261141097babd953d2c28a07be57ae4c3a1a02/ijson-3.5.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:e44af39e6f8a17e5627dcd89715d8279bf3474153ff99aae031a936e5c5572e5", size = 144369, upload-time = "2026-02-24T03:57:53.22Z" }, + { url = "https://files.pythonhosted.org/packages/9b/3a/deb9772bb2c0cead7ad64f00c3598eec9072bdf511818e70e2c512eeabbe/ijson-3.5.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:9260332304b7e7828db56d43f08fc970a3ab741bf84ff10189361ea1b60c395b", size = 151352, upload-time = "2026-02-24T03:57:54.375Z" }, + { url = "https://files.pythonhosted.org/packages/9f/d9/86f7fac35e0835faa188085ae0579e813493d5261ce056484015ad533445/ijson-3.5.0-cp314-cp314t-macosx_10_15_universal2.whl", hash = "sha256:2ea4b676ec98e374c1df400a47929859e4fa1239274339024df4716e802aa7e4", size = 93069, upload-time = "2026-02-24T03:57:57.849Z" }, + { url = "https://files.pythonhosted.org/packages/33/d2/e7366ed9c6e60228d35baf4404bac01a126e7775ea8ce57f560125ed190a/ijson-3.5.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:014586eec043e23c80be9a923c56c3a0920a0f1f7d17478ce7bc20ba443968ef", size = 62767, upload-time = "2026-02-24T03:57:58.758Z" }, + { url = "https://files.pythonhosted.org/packages/35/8b/3e703e8cc4b3ada79f13b28070b51d9550c578f76d1968657905857b2ddd/ijson-3.5.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:d5b8b886b0248652d437f66e7c5ac318bbdcb2c7137a7e5327a68ca00b286f5f", size = 62467, upload-time = "2026-02-24T03:58:00.261Z" }, + { url = "https://files.pythonhosted.org/packages/21/42/0c91af32c1ee8a957fdac2e051b5780756d05fd34e4b60d94a08d51bac1d/ijson-3.5.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:498fd46ae2349297e43acf97cdc421e711dbd7198418677259393d2acdc62d78", size = 200447, upload-time = "2026-02-24T03:58:01.591Z" }, + { url = "https://files.pythonhosted.org/packages/f9/80/796ea0e391b7e2d45c5b1b451734bba03f81c2984cf955ea5eaa6c4920ad/ijson-3.5.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22a51b4f9b81f12793731cf226266d1de2112c3c04ba4a04117ad4e466897e05", size = 217820, upload-time = "2026-02-24T03:58:02.598Z" }, + { url = "https://files.pythonhosted.org/packages/38/14/52b6613fdda4078c62eb5b4fe3efc724ddc55a4ad524c93de51830107aa3/ijson-3.5.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9636c710dc4ac4a281baa266a64f323b4cc165cec26836af702c44328b59a515", size = 208310, upload-time = "2026-02-24T03:58:04.759Z" }, + { url = "https://files.pythonhosted.org/packages/6a/ad/8b3105a78774fd4a65e534a21d975ef3a77e189489fe3029ebcaeba5e243/ijson-3.5.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f7168a39e8211107666d71b25693fd1b2bac0b33735ef744114c403c6cac21e1", size = 211843, upload-time = "2026-02-24T03:58:05.836Z" }, + { url = "https://files.pythonhosted.org/packages/36/ab/a2739f6072d6e1160581bc3ed32da614c8cced023dcd519d9c5fa66e0425/ijson-3.5.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:8696454245415bc617ab03b0dc3ae4c86987df5dc6a90bad378fe72c5409d89e", size = 200906, upload-time = "2026-02-24T03:58:07.788Z" }, + { url = "https://files.pythonhosted.org/packages/6d/5e/e06c2de3c3d4a9cfb655c1ad08a68fb72838d271072cdd3196576ac4431a/ijson-3.5.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:c21bfb61f71f191565885bf1bc29e0a186292d866b4880637b833848360bdc1b", size = 205495, upload-time = "2026-02-24T03:58:09.163Z" }, + { url = "https://files.pythonhosted.org/packages/d9/3b/d31ecfa63a218978617446159f3d77aab2417a5bd2885c425b176353ff78/ijson-3.5.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:d64c624da0e9d692d6eb0ff63a79656b59d76bf80773a17c5b0f835e4e8ef627", size = 57715, upload-time = "2026-02-24T03:58:24.545Z" }, + { url = "https://files.pythonhosted.org/packages/30/51/b170e646d378e8cccf9637c05edb5419b00c2c4df64b0258c3af5355608e/ijson-3.5.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:876f7df73b7e0d6474f9caa729b9cdbfc8e76de9075a4887dfd689e29e85c4ca", size = 57205, upload-time = "2026-02-24T03:58:25.681Z" }, + { url = "https://files.pythonhosted.org/packages/ef/83/44dbd0231b0a8c6c14d27473d10c4e27dfbce7d5d9a833c79e3e6c33eb40/ijson-3.5.0-pp311-pypy311_pp73-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e7dbff2c8d9027809b0cde663df44f3210da10ea377121d42896fb6ee405dd31", size = 71229, upload-time = "2026-02-24T03:58:27.103Z" }, + { url = "https://files.pythonhosted.org/packages/c8/98/cf84048b7c6cec888826e696a31f45bee7ebcac15e532b6be1fc4c2c9608/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4217a1edc278660679e1197c83a1a2a2d367792bfbb2a3279577f4b59b93730d", size = 71217, upload-time = "2026-02-24T03:58:28.021Z" }, + { url = "https://files.pythonhosted.org/packages/3c/0a/e34c729a87ff67dc6540f6bcc896626158e691d433ab57db0086d73decd2/ijson-3.5.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:04f0fc740311388ee745ba55a12292b722d6f52000b11acbb913982ba5fbdf87", size = 68618, upload-time = "2026-02-24T03:58:28.918Z" }, +] + [[package]] name = "imagehash" version = "4.3.2" @@ -2751,6 +2818,7 @@ dependencies = [ { name = "flower", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "gotenberg-client", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "httpx-oauth", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "ijson", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "imap-tools", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "jinja2", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "langdetect", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -2898,6 +2966,7 @@ requires-dist = [ { name = "gotenberg-client", specifier = "~=0.13.1" }, { name = "granian", extras = ["uvloop"], marker = "extra == 'webserver'", specifier = "~=2.7.0" }, { name = "httpx-oauth", specifier = "~=0.16" }, + { name = "ijson", specifier = ">=3.2" }, { name = "imap-tools", specifier = "~=1.11.0" }, { name = "jinja2", specifier = "~=3.1.5" }, { name = "langdetect", specifier = "~=1.0.9" },