mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	filename handling for archive files.
This commit is contained in:
		
							parent
							
								
									aaa6599283
								
							
						
					
					
						commit
						8a5c782425
					
				@ -193,6 +193,8 @@ class Consumer(LoggingMixin):
 | 
			
		||||
                # After everything is in the database, copy the files into
 | 
			
		||||
                # place. If this fails, we'll also rollback the transaction.
 | 
			
		||||
 | 
			
		||||
                # TODO: not required, since this is done by the file handling
 | 
			
		||||
                #  logic
 | 
			
		||||
                create_source_path_directory(document.source_path)
 | 
			
		||||
 | 
			
		||||
                self._write(document.storage_type,
 | 
			
		||||
 | 
			
		||||
@ -11,10 +11,13 @@ def create_source_path_directory(source_path):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# TODO: also make this work for archive dir
 | 
			
		||||
def delete_empty_directories(directory):
 | 
			
		||||
def delete_empty_directories(directory, root):
 | 
			
		||||
    if not os.path.isdir(directory):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    # Go up in the directory hierarchy and try to delete all directories
 | 
			
		||||
    directory = os.path.normpath(directory)
 | 
			
		||||
    root = os.path.normpath(settings.ORIGINALS_DIR)
 | 
			
		||||
    root = os.path.normpath(root)
 | 
			
		||||
 | 
			
		||||
    if not directory.startswith(root + os.path.sep):
 | 
			
		||||
        # don't do anything outside our originals folder.
 | 
			
		||||
@ -102,3 +105,8 @@ def generate_filename(doc):
 | 
			
		||||
        filename += ".gpg"
 | 
			
		||||
 | 
			
		||||
    return filename
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def archive_name_from_filename(filename):
 | 
			
		||||
 | 
			
		||||
    return os.path.splitext(filename)[0] + ".pdf"
 | 
			
		||||
 | 
			
		||||
@ -11,6 +11,7 @@ from django.db import models
 | 
			
		||||
from django.utils import timezone
 | 
			
		||||
from django.utils.text import slugify
 | 
			
		||||
 | 
			
		||||
from documents.file_handling import archive_name_from_filename
 | 
			
		||||
from documents.parsers import get_default_file_extension
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -233,9 +234,10 @@ class Document(models.Model):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def archive_path(self):
 | 
			
		||||
        fname = "{:07}{}".format(self.pk, ".pdf")
 | 
			
		||||
        if self.storage_type == self.STORAGE_TYPE_GPG:
 | 
			
		||||
            fname += ".gpg"
 | 
			
		||||
        if self.filename:
 | 
			
		||||
            fname = archive_name_from_filename(self.filename)
 | 
			
		||||
        else:
 | 
			
		||||
            fname = "{:07}.pdf".format(self.pk)
 | 
			
		||||
 | 
			
		||||
        return os.path.join(
 | 
			
		||||
            settings.ARCHIVE_DIR,
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,7 @@ from rest_framework.reverse import reverse
 | 
			
		||||
 | 
			
		||||
from .. import index, matching
 | 
			
		||||
from ..file_handling import delete_empty_directories, generate_filename, \
 | 
			
		||||
    create_source_path_directory
 | 
			
		||||
    create_source_path_directory, archive_name_from_filename
 | 
			
		||||
from ..models import Document, Tag
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -175,13 +175,40 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
			
		||||
        if os.path.isfile(f):
 | 
			
		||||
            try:
 | 
			
		||||
                os.unlink(f)
 | 
			
		||||
                logging.getLogger(__name__).debug(
 | 
			
		||||
                    f"Deleted file {f}.")
 | 
			
		||||
            except OSError as e:
 | 
			
		||||
                logging.getLogger(__name__).warning(
 | 
			
		||||
                    f"While deleting document {instance.file_name}, the file "
 | 
			
		||||
                    f"{f} could not be deleted: {e}"
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
    delete_empty_directories(os.path.dirname(instance.source_path))
 | 
			
		||||
    delete_empty_directories(
 | 
			
		||||
        os.path.dirname(instance.source_path),
 | 
			
		||||
        root=settings.ORIGINALS_DIR
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    delete_empty_directories(
 | 
			
		||||
        os.path.dirname(instance.archive_path),
 | 
			
		||||
        root=settings.ARCHIVE_DIR
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validate_move(instance, old_path, new_path):
 | 
			
		||||
    if not os.path.isfile(old_path):
 | 
			
		||||
        # Can't do anything if the old file does not exist anymore.
 | 
			
		||||
        logging.getLogger(__name__).fatal(
 | 
			
		||||
            f"Document {str(instance)}: File {old_path} has gone.")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    if os.path.isfile(new_path):
 | 
			
		||||
        # Can't do anything if the new file already exists. Skip updating file.
 | 
			
		||||
        logging.getLogger(__name__).warning(
 | 
			
		||||
            f"Document {str(instance)}: Cannot rename file "
 | 
			
		||||
            f"since target path {new_path} already exists.")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@receiver(models.signals.m2m_changed, sender=Document.tags.through)
 | 
			
		||||
@ -189,55 +216,90 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
			
		||||
def update_filename_and_move_files(sender, instance, **kwargs):
 | 
			
		||||
 | 
			
		||||
    if not instance.filename:
 | 
			
		||||
        # Can't update the filename if there is not filename to begin with
 | 
			
		||||
        # This happens after the consumer creates a new document.
 | 
			
		||||
        # The PK needs to be set first by saving the document once. When this
 | 
			
		||||
        # happens, the file is not yet in the ORIGINALS_DIR, and thus can't be
 | 
			
		||||
        # renamed anyway. In all other cases, instance.filename will be set.
 | 
			
		||||
        # Can't update the filename if there is no filename to begin with
 | 
			
		||||
        # This happens when the consumer creates a new document.
 | 
			
		||||
        # The document is modified and saved multiple times, and only after
 | 
			
		||||
        # everything is done (i.e., the generated filename is final),
 | 
			
		||||
        # filename will be set to the location where the consumer has put
 | 
			
		||||
        # the file.
 | 
			
		||||
        #
 | 
			
		||||
        # This will in turn cause this logic to move the file where it belongs.
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    old_filename = instance.filename
 | 
			
		||||
    old_path = instance.source_path
 | 
			
		||||
    new_filename = generate_filename(instance)
 | 
			
		||||
 | 
			
		||||
    if new_filename == instance.filename:
 | 
			
		||||
        # Don't do anything if its the same.
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    new_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
 | 
			
		||||
    old_source_path = instance.source_path
 | 
			
		||||
    new_source_path = os.path.join(settings.ORIGINALS_DIR, new_filename)
 | 
			
		||||
 | 
			
		||||
    if not os.path.isfile(old_path):
 | 
			
		||||
        # Can't do anything if the old file does not exist anymore.
 | 
			
		||||
        logging.getLogger(__name__).fatal(
 | 
			
		||||
            f"Document {str(instance)}: File {old_path} has gone.")
 | 
			
		||||
    if not validate_move(instance, old_source_path, new_source_path):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    if os.path.isfile(new_path):
 | 
			
		||||
        # Can't do anything if the new file already exists. Skip updating file.
 | 
			
		||||
        logging.getLogger(__name__).warning(
 | 
			
		||||
            f"Document {str(instance)}: Cannot rename file "
 | 
			
		||||
            f"since target path {new_path} already exists.")
 | 
			
		||||
    # archive files are optional, archive checksum tells us if we have one,
 | 
			
		||||
    # since this is None for documents without archived files.
 | 
			
		||||
    if instance.archive_checksum:
 | 
			
		||||
        new_archive_filename = archive_name_from_filename(new_filename)
 | 
			
		||||
        old_archive_path = instance.archive_path
 | 
			
		||||
        new_archive_path = os.path.join(settings.ARCHIVE_DIR,
 | 
			
		||||
                                        new_archive_filename)
 | 
			
		||||
 | 
			
		||||
        if not validate_move(instance, old_archive_path, new_archive_path):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
    create_source_path_directory(new_path)
 | 
			
		||||
        create_source_path_directory(new_archive_path)
 | 
			
		||||
    else:
 | 
			
		||||
        old_archive_path = None
 | 
			
		||||
        new_archive_path = None
 | 
			
		||||
 | 
			
		||||
    create_source_path_directory(new_source_path)
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        os.rename(old_path, new_path)
 | 
			
		||||
        os.rename(old_source_path, new_source_path)
 | 
			
		||||
        if instance.archive_checksum:
 | 
			
		||||
            os.rename(old_archive_path, new_archive_path)
 | 
			
		||||
        instance.filename = new_filename
 | 
			
		||||
        # Don't save here to prevent infinite recursion.
 | 
			
		||||
        Document.objects.filter(pk=instance.pk).update(filename=new_filename)
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).debug(
 | 
			
		||||
            f"Moved file {old_path} to {new_path}.")
 | 
			
		||||
            f"Moved file {old_source_path} to {new_source_path}.")
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).debug(
 | 
			
		||||
            f"Moved file {old_archive_path} to {new_archive_path}.")
 | 
			
		||||
 | 
			
		||||
    except OSError as e:
 | 
			
		||||
        instance.filename = old_filename
 | 
			
		||||
        # this happens when we can't move a file. If that's the case for the
 | 
			
		||||
        # archive file, we try our best to revert the changes.
 | 
			
		||||
        try:
 | 
			
		||||
            os.rename(new_source_path, old_source_path)
 | 
			
		||||
            os.rename(new_archive_path, old_archive_path)
 | 
			
		||||
        except:
 | 
			
		||||
            # This is fine, since:
 | 
			
		||||
            # A: if we managed to move source from A to B, we will also manage
 | 
			
		||||
            #  to move it from B to A. If not, we have a serious issue
 | 
			
		||||
            #  that's going to get caught by the santiy checker.
 | 
			
		||||
            #  all files remain in place and will never be overwritten,
 | 
			
		||||
            #  so this is not the end of the world.
 | 
			
		||||
            # B: if moving the orignal file failed, nothing has changed anyway.
 | 
			
		||||
            pass
 | 
			
		||||
    except DatabaseError as e:
 | 
			
		||||
        os.rename(new_path, old_path)
 | 
			
		||||
        os.rename(new_source_path, old_source_path)
 | 
			
		||||
        if instance.archive_checksum:
 | 
			
		||||
            os.rename(new_archive_path, old_archive_path)
 | 
			
		||||
        instance.filename = old_filename
 | 
			
		||||
 | 
			
		||||
    if not os.path.isfile(old_path):
 | 
			
		||||
        delete_empty_directories(os.path.dirname(old_path))
 | 
			
		||||
    if not os.path.isfile(old_source_path):
 | 
			
		||||
        delete_empty_directories(os.path.dirname(old_source_path),
 | 
			
		||||
                                 root=settings.ORIGINALS_DIR)
 | 
			
		||||
 | 
			
		||||
    if old_archive_path and not os.path.isfile(old_archive_path):
 | 
			
		||||
        delete_empty_directories(os.path.dirname(old_archive_path),
 | 
			
		||||
                                 root=settings.ARCHIVE_DIR)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,7 @@ from documents.sanity_checker import SanityFailedError
 | 
			
		||||
 | 
			
		||||
def index_optimize():
 | 
			
		||||
    ix = index.open_index()
 | 
			
		||||
    with AsyncWriter(ix) as writer:
 | 
			
		||||
    writer = AsyncWriter(ix)
 | 
			
		||||
    writer.commit(optimize=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -110,10 +110,12 @@ class DocumentApiTest(DirectoriesMixin, APITestCase):
 | 
			
		||||
        with open(filename, "wb") as f:
 | 
			
		||||
            f.write(content)
 | 
			
		||||
 | 
			
		||||
        doc = Document.objects.create(title="none", filename=os.path.basename(filename),
 | 
			
		||||
        filename = os.path.basename(filename)
 | 
			
		||||
 | 
			
		||||
        doc = Document.objects.create(title="none", filename=filename,
 | 
			
		||||
                                      mime_type="application/pdf")
 | 
			
		||||
 | 
			
		||||
        with open(os.path.join(self.dirs.archive_dir, "{:07d}.pdf".format(doc.pk)), "wb") as f:
 | 
			
		||||
        with open(doc.archive_path, "wb") as f:
 | 
			
		||||
            f.write(content_archive)
 | 
			
		||||
 | 
			
		||||
        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
 | 
			
		||||
 | 
			
		||||
@ -321,7 +321,7 @@ class TestDate(TestCase):
 | 
			
		||||
        Path(os.path.join(tmp, "notempty", "file")).touch()
 | 
			
		||||
        os.makedirs(os.path.join(tmp, "notempty", "empty"))
 | 
			
		||||
 | 
			
		||||
        delete_empty_directories(os.path.join(tmp, "notempty", "empty"))
 | 
			
		||||
        delete_empty_directories(os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR)
 | 
			
		||||
        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
 | 
			
		||||
        self.assertEqual(os.path.isfile(
 | 
			
		||||
            os.path.join(tmp, "notempty", "file")), True)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user