mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	add support for archive files.
This commit is contained in:
		
							parent
							
								
									9a33f191a7
								
							
						
					
					
						commit
						8069c2eb6a
					
				@ -134,6 +134,7 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            self.log("debug", "Parsing {}...".format(self.filename))
 | 
					            self.log("debug", "Parsing {}...".format(self.filename))
 | 
				
			||||||
            text = document_parser.get_text()
 | 
					            text = document_parser.get_text()
 | 
				
			||||||
            date = document_parser.get_date()
 | 
					            date = document_parser.get_date()
 | 
				
			||||||
 | 
					            archive_path = document_parser.get_archive_path()
 | 
				
			||||||
        except ParseError as e:
 | 
					        except ParseError as e:
 | 
				
			||||||
            document_parser.cleanup()
 | 
					            document_parser.cleanup()
 | 
				
			||||||
            raise ConsumerError(e)
 | 
					            raise ConsumerError(e)
 | 
				
			||||||
@ -178,8 +179,16 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                # place. If this fails, we'll also rollback the transaction.
 | 
					                # place. If this fails, we'll also rollback the transaction.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                create_source_path_directory(document.source_path)
 | 
					                create_source_path_directory(document.source_path)
 | 
				
			||||||
                self._write(document, self.path, document.source_path)
 | 
					
 | 
				
			||||||
                self._write(document, thumbnail, document.thumbnail_path)
 | 
					                self._write(document.storage_type,
 | 
				
			||||||
 | 
					                            self.path, document.source_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                self._write(document.storage_type,
 | 
				
			||||||
 | 
					                            thumbnail, document.thumbnail_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if archive_path and os.path.isfile(archive_path):
 | 
				
			||||||
 | 
					                    self._write(Document.STORAGE_TYPE_UNENCRYPTED,
 | 
				
			||||||
 | 
					                                archive_path, document.archive_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # Delete the file only if it was successfully consumed
 | 
					                # Delete the file only if it was successfully consumed
 | 
				
			||||||
                self.log("debug", "Deleting file {}".format(self.path))
 | 
					                self.log("debug", "Deleting file {}".format(self.path))
 | 
				
			||||||
@ -258,10 +267,10 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            for tag_id in self.override_tag_ids:
 | 
					            for tag_id in self.override_tag_ids:
 | 
				
			||||||
                document.tags.add(Tag.objects.get(pk=tag_id))
 | 
					                document.tags.add(Tag.objects.get(pk=tag_id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _write(self, document, source, target):
 | 
					    def _write(self, storage_type, source, target):
 | 
				
			||||||
        with open(source, "rb") as read_file:
 | 
					        with open(source, "rb") as read_file:
 | 
				
			||||||
            with open(target, "wb") as write_file:
 | 
					            with open(target, "wb") as write_file:
 | 
				
			||||||
                if document.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
 | 
					                if storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
 | 
				
			||||||
                    write_file.write(read_file.read())
 | 
					                    write_file.write(read_file.read())
 | 
				
			||||||
                    return
 | 
					                    return
 | 
				
			||||||
                self.log("debug", "Encrypting")
 | 
					                self.log("debug", "Encrypting")
 | 
				
			||||||
 | 
				
			|||||||
@ -224,6 +224,19 @@ class Document(models.Model):
 | 
				
			|||||||
    def source_file(self):
 | 
					    def source_file(self):
 | 
				
			||||||
        return open(self.source_path, "rb")
 | 
					        return open(self.source_path, "rb")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def archive_path(self):
 | 
				
			||||||
 | 
					        fname = "{:07}{}".format(self.pk, ".pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return os.path.join(
 | 
				
			||||||
 | 
					            settings.ARCHIVE_DIR,
 | 
				
			||||||
 | 
					            fname
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def archive_file(self):
 | 
				
			||||||
 | 
					        return open(self.archive_path, "rb")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def file_name(self):
 | 
					    def file_name(self):
 | 
				
			||||||
        return slugify(str(self)) + self.file_type
 | 
					        return slugify(str(self)) + self.file_type
 | 
				
			||||||
 | 
				
			|||||||
@ -141,6 +141,9 @@ class DocumentParser(LoggingMixin):
 | 
				
			|||||||
        self.tempdir = tempfile.mkdtemp(
 | 
					        self.tempdir = tempfile.mkdtemp(
 | 
				
			||||||
            prefix="paperless-", dir=settings.SCRATCH_DIR)
 | 
					            prefix="paperless-", dir=settings.SCRATCH_DIR)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def get_archive_path(self):
 | 
				
			||||||
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_thumbnail(self):
 | 
					    def get_thumbnail(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Returns the path to a file we can use as a thumbnail for this document.
 | 
					        Returns the path to a file we can use as a thumbnail for this document.
 | 
				
			||||||
 | 
				
			|||||||
@ -168,11 +168,17 @@ def run_post_consume_script(sender, document, **kwargs):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
@receiver(models.signals.post_delete, sender=Document)
 | 
					@receiver(models.signals.post_delete, sender=Document)
 | 
				
			||||||
def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
					def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
				
			||||||
    for f in (instance.source_path, instance.thumbnail_path):
 | 
					    for f in (instance.source_path,
 | 
				
			||||||
 | 
					              instance.archive_path,
 | 
				
			||||||
 | 
					              instance.thumbnail_path):
 | 
				
			||||||
 | 
					        if os.path.isfile(f):
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                os.unlink(f)
 | 
					                os.unlink(f)
 | 
				
			||||||
        except FileNotFoundError:
 | 
					            except OSError as e:
 | 
				
			||||||
            pass  # The file's already gone, so we're cool with it.
 | 
					                logging.getLogger(__name__).warning(
 | 
				
			||||||
 | 
					                    f"While deleting document {instance.file_name}, the file "
 | 
				
			||||||
 | 
					                    f"{f} could not be deleted: {e}"
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    delete_empty_directories(os.path.dirname(instance.source_path))
 | 
					    delete_empty_directories(os.path.dirname(instance.source_path))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user