From 246f17c6c85ee0d4958e5c6e99f77007a050839c Mon Sep 17 00:00:00 2001 From: Kilian <70379879+kaerbr@users.noreply.github.com> Date: Fri, 13 Jun 2025 19:06:37 +0200 Subject: [PATCH] Enhancement: support import of zipped export (#10073) --- docs/administration.md | 5 +-- .../management/commands/document_importer.py | 16 ++++++-- .../tests/test_management_importer.py | 40 +++++++++++++++++++ 3 files changed, 53 insertions(+), 8 deletions(-) diff --git a/docs/administration.md b/docs/administration.md index bb7055141..0b9974def 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -333,7 +333,7 @@ must be provided to import. If this value is lost, the export cannot be imported The document importer takes the export produced by the [Document exporter](#exporter) and imports it into paperless. -The importer works just like the exporter. You point it at a directory, +The importer works just like the exporter. You point it at a directory or the generated .zip file, and the script does the rest of the work: ```shell @@ -351,9 +351,6 @@ When you use the provided docker compose script, put the export inside the `export` folder in your paperless source directory. Specify `../export` as the `source`. -Note that .zip files (as can be generated from the exporter) are not supported. You must unzip them into -the target directory first. - !!! note Importing from a previous version of Paperless may work, but for best diff --git a/src/documents/management/commands/document_importer.py b/src/documents/management/commands/document_importer.py index 9e3af47e7..282f5c48e 100644 --- a/src/documents/management/commands/document_importer.py +++ b/src/documents/management/commands/document_importer.py @@ -1,9 +1,12 @@ import json import logging import os +import tempfile from collections.abc import Generator from contextlib import contextmanager from pathlib import Path +from zipfile import ZipFile +from zipfile import is_zipfile import tqdm from django.conf import settings @@ -234,14 +237,19 @@ class Command(CryptMixin, BaseCommand): self.manifest_paths = [] self.manifest = [] + # Create a temporary directory for extracting a zip file into it, even if supplied source is no zip file to keep code cleaner. + with tempfile.TemporaryDirectory() as tmp_dir: + if is_zipfile(self.source): + with ZipFile(self.source) as zf: + zf.extractall(tmp_dir) + self.source = Path(tmp_dir) + self._run_import() + + def _run_import(self): self.pre_check() - self.load_metadata() - self.load_manifest_files() - self.check_manifest_validity() - self.decrypt_secret_fields() # see /src/documents/signals/handlers.py diff --git a/src/documents/tests/test_management_importer.py b/src/documents/tests/test_management_importer.py index 5cee9ae47..e700ecdc9 100644 --- a/src/documents/tests/test_management_importer.py +++ b/src/documents/tests/test_management_importer.py @@ -2,6 +2,7 @@ import json import tempfile from io import StringIO from pathlib import Path +from zipfile import ZipFile from django.contrib.auth.models import User from django.core.management import call_command @@ -335,3 +336,42 @@ class TestCommandImport( self.assertIn("Version mismatch:", stdout_str) self.assertIn("importing 2.8.1", stdout_str) + + def test_import_zipped_export(self): + """ + GIVEN: + - A zip file with correct content (manifest.json and version.json inside) + WHEN: + - An import is attempted using the zip file as the source + THEN: + - The command reads from the zip without warnings or errors + """ + + stdout = StringIO() + zip_path = self.dirs.scratch_dir / "export.zip" + + # Create manifest.json and version.json in a temp dir + with tempfile.TemporaryDirectory() as temp_dir: + temp_dir_path = Path(temp_dir) + + (temp_dir_path / "manifest.json").touch() + (temp_dir_path / "version.json").touch() + + # Create the zip file + with ZipFile(zip_path, "w") as zf: + zf.write(temp_dir_path / "manifest.json", arcname="manifest.json") + zf.write(temp_dir_path / "version.json", arcname="version.json") + + # Try to import from the zip file + with self.assertRaises(json.decoder.JSONDecodeError): + call_command( + "document_importer", + "--no-progress-bar", + str(zip_path), + stdout=stdout, + ) + stdout.seek(0) + stdout_str = str(stdout.read()) + + # There should be no error or warnings. Therefore the output should be empty. + self.assertEqual(stdout_str, "")