Fixhancement: refactor email attachment logic

This commit is contained in:
shamoon 2025-11-10 11:28:31 -08:00
parent 44f0191bfb
commit f6705ae036
No known key found for this signature in database
4 changed files with 143 additions and 51 deletions

View File

@ -1,25 +1,26 @@
from __future__ import annotations from __future__ import annotations
from dataclasses import dataclass
from email import message_from_bytes from email import message_from_bytes
from typing import TYPE_CHECKING from pathlib import Path
from django.conf import settings from django.conf import settings
from django.core.mail import EmailMessage from django.core.mail import EmailMessage
from filelock import FileLock from filelock import FileLock
from documents.data_models import ConsumableDocument
if TYPE_CHECKING: @dataclass(frozen=True)
from documents.models import Document class EmailAttachment:
path: Path
mime_type: str
friendly_name: str
def send_email( def send_email(
subject: str, subject: str,
body: str, body: str,
to: list[str], to: list[str],
attachments: list[Document | ConsumableDocument], attachments: list[EmailAttachment],
*,
use_archive: bool,
) -> int: ) -> int:
""" """
Send an email with attachments. Send an email with attachments.
@ -28,8 +29,7 @@ def send_email(
subject: Email subject subject: Email subject
body: Email body text body: Email body text
to: List of recipient email addresses to: List of recipient email addresses
attachments: List of documents to attach (the list may be empty) attachments: List of attachments
use_archive: Whether to attach archive versions when available
Returns: Returns:
Number of emails sent Number of emails sent
@ -46,47 +46,41 @@ def send_email(
# Something could be renaming the file concurrently so it can't be attached # Something could be renaming the file concurrently so it can't be attached
with FileLock(settings.MEDIA_LOCK): with FileLock(settings.MEDIA_LOCK):
for document in attachments: for attachment in attachments:
if isinstance(document, ConsumableDocument): filename = _get_unique_filename(
attachment_path = document.original_file attachment.friendly_name,
friendly_filename = document.original_file.name
else:
attachment_path = (
document.archive_path
if use_archive and document.has_archive_version
else document.source_path
)
friendly_filename = _get_unique_filename(
document,
used_filenames, used_filenames,
archive=use_archive and document.has_archive_version,
) )
used_filenames.add(friendly_filename) used_filenames.add(filename)
with attachment_path.open("rb") as f: with attachment.path.open("rb") as f:
content = f.read() content = f.read()
if document.mime_type == "message/rfc822": if attachment.mime_type == "message/rfc822":
# See https://forum.djangoproject.com/t/using-emailmessage-with-an-attached-email-file-crashes-due-to-non-ascii/37981 # See https://forum.djangoproject.com/t/using-emailmessage-with-an-attached-email-file-crashes-due-to-non-ascii/37981
content = message_from_bytes(content) content = message_from_bytes(content)
email.attach( email.attach(
filename=friendly_filename, filename=filename,
content=content, content=content,
mimetype=document.mime_type, mimetype=attachment.mime_type,
) )
return email.send() return email.send()
def _get_unique_filename(doc: Document, used_names: set[str], *, archive: bool) -> str: def _get_unique_filename(friendly_name: str, used_names: set[str]) -> str:
""" """
Constructs a unique friendly filename for the given document. Constructs a unique friendly filename for the given document, append a counter if needed.
"""
if friendly_name not in used_names:
return friendly_name
The filename might not be unique enough, so a counter is appended if needed. stem = Path(friendly_name).stem
""" suffix = "".join(Path(friendly_name).suffixes)
counter = 0
counter = 1
while True: while True:
filename = doc.get_public_filename(archive=archive, counter=counter) filename = f"{stem}_{counter:02}{suffix}"
if filename not in used_names: if filename not in used_names:
return filename return filename
counter += 1 counter += 1

View File

@ -31,11 +31,10 @@ from guardian.shortcuts import remove_perm
from documents import matching from documents import matching
from documents.caching import clear_document_caches from documents.caching import clear_document_caches
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentSource
from documents.file_handling import create_source_path_directory from documents.file_handling import create_source_path_directory
from documents.file_handling import delete_empty_directories from documents.file_handling import delete_empty_directories
from documents.file_handling import generate_unique_filename from documents.file_handling import generate_unique_filename
from documents.mail import EmailAttachment
from documents.mail import send_email from documents.mail import send_email
from documents.models import Correspondent from documents.models import Correspondent
from documents.models import CustomField from documents.models import CustomField
@ -57,6 +56,7 @@ from documents.templating.workflows import parse_w_workflow_placeholders
if TYPE_CHECKING: if TYPE_CHECKING:
from documents.classifier import DocumentClassifier from documents.classifier import DocumentClassifier
from documents.data_models import ConsumableDocument
from documents.data_models import DocumentMetadataOverrides from documents.data_models import DocumentMetadataOverrides
logger = logging.getLogger("paperless.handlers") logger = logging.getLogger("paperless.handlers")
@ -1163,28 +1163,41 @@ def run_workflows(
else "" else ""
) )
try: try:
attachments = [] attachments: list[EmailAttachment] = []
if action.email.include_document: if action.email.include_document:
attachment: EmailAttachment | None = None
if trigger_type in [ if trigger_type in [
WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
WorkflowTrigger.WorkflowTriggerType.SCHEDULED, WorkflowTrigger.WorkflowTriggerType.SCHEDULED,
]: ] and isinstance(document, Document):
# Updated and scheduled can pass the document directly friendly_name = (
attachments = [document] Path(current_filename).name
if current_filename
else document.source_path.name
)
attachment = EmailAttachment(
path=document.source_path,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
elif original_file: elif original_file:
# For consumed and added document is not yet saved, so pass the original file friendly_name = (
attachments = [ Path(current_filename).name
ConsumableDocument( if current_filename
source=DocumentSource.ApiUpload, else original_file.name
original_file=original_file, )
), attachment = EmailAttachment(
] path=original_file,
mime_type=document.mime_type,
friendly_name=friendly_name,
)
if attachment:
attachments = [attachment]
n_messages = send_email( n_messages = send_email(
subject=subject, subject=subject,
body=body, body=body,
to=action.email.to.split(","), to=action.email.to.split(","),
attachments=attachments, attachments=attachments,
use_archive=False,
) )
logger.debug( logger.debug(
f"Sent {n_messages} notification email(s) to {action.email.to}", f"Sent {n_messages} notification email(s) to {action.email.to}",

View File

@ -8,8 +8,10 @@ from typing import TYPE_CHECKING
from unittest import mock from unittest import mock
import pytest import pytest
from django.conf import settings
from django.contrib.auth.models import Group from django.contrib.auth.models import Group
from django.contrib.auth.models import User from django.contrib.auth.models import User
from django.core import mail
from django.test import override_settings from django.test import override_settings
from django.utils import timezone from django.utils import timezone
from guardian.shortcuts import assign_perm from guardian.shortcuts import assign_perm
@ -21,6 +23,8 @@ from pytest_httpx import HTTPXMock
from rest_framework.test import APIClient from rest_framework.test import APIClient
from rest_framework.test import APITestCase from rest_framework.test import APITestCase
from documents.file_handling import create_source_path_directory
from documents.file_handling import generate_unique_filename
from documents.signals.handlers import run_workflows from documents.signals.handlers import run_workflows
from documents.signals.handlers import send_webhook from documents.signals.handlers import send_webhook
@ -2989,6 +2993,70 @@ class TestWorkflows(
mock_email_send.assert_called_once() mock_email_send.assert_called_once()
@override_settings(
PAPERLESS_EMAIL_HOST="localhost",
EMAIL_ENABLED=True,
PAPERLESS_URL="http://localhost:8000",
EMAIL_BACKEND="django.core.mail.backends.locmem.EmailBackend",
)
def test_workflow_email_attachment_uses_storage_filename(self):
"""
GIVEN:
- Document updated workflow with include document action
- Document stored with formatted storage-path filename
WHEN:
- Workflow sends an email
THEN:
- Attachment filename matches the stored filename
"""
trigger = WorkflowTrigger.objects.create(
type=WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED,
)
email_action = WorkflowActionEmail.objects.create(
subject="Test Notification: {doc_title}",
body="Test message: {doc_url}",
to="me@example.com",
include_document=True,
)
action = WorkflowAction.objects.create(
type=WorkflowAction.WorkflowActionType.EMAIL,
email=email_action,
)
workflow = Workflow.objects.create(
name="Workflow attachment filename",
order=0,
)
workflow.triggers.add(trigger)
workflow.actions.add(action)
workflow.save()
storage_path = StoragePath.objects.create(
name="Fancy Path",
path="formatted/{{ document.pk }}/{{ title }}",
)
doc = Document.objects.create(
title="workflow doc",
correspondent=self.c,
checksum="workflow-email-attachment",
mime_type="application/pdf",
storage_path=storage_path,
original_filename="workflow-orig.pdf",
)
# eg what happens in update_filename_and_move_files
generated = generate_unique_filename(doc)
destination = (settings.ORIGINALS_DIR / generated).resolve()
create_source_path_directory(destination)
shutil.copy(self.SAMPLE_DIR / "simple.pdf", destination)
Document.objects.filter(pk=doc.pk).update(filename=generated.as_posix())
run_workflows(WorkflowTrigger.WorkflowTriggerType.DOCUMENT_UPDATED, doc)
self.assertEqual(len(mail.outbox), 1)
attachment_names = [att[0] for att in mail.outbox[0].attachments]
self.assertEqual(attachment_names, [Path(generated).name])
@override_settings( @override_settings(
EMAIL_ENABLED=False, EMAIL_ENABLED=False,
) )

View File

@ -120,6 +120,7 @@ from documents.filters import PaperlessTaskFilterSet
from documents.filters import ShareLinkFilterSet from documents.filters import ShareLinkFilterSet
from documents.filters import StoragePathFilterSet from documents.filters import StoragePathFilterSet
from documents.filters import TagFilterSet from documents.filters import TagFilterSet
from documents.mail import EmailAttachment
from documents.mail import send_email from documents.mail import send_email
from documents.matching import match_correspondents from documents.matching import match_correspondents
from documents.matching import match_document_types from documents.matching import match_document_types
@ -1216,12 +1217,28 @@ class DocumentViewSet(
return HttpResponseForbidden("Insufficient permissions") return HttpResponseForbidden("Insufficient permissions")
try: try:
attachments: list[EmailAttachment] = []
for doc in documents:
attachment_path = (
doc.archive_path
if use_archive_version and doc.has_archive_version
else doc.source_path
)
attachments.append(
EmailAttachment(
path=attachment_path,
mime_type=doc.mime_type,
friendly_name=doc.get_public_filename(
archive=use_archive_version and doc.has_archive_version,
),
),
)
send_email( send_email(
subject=subject, subject=subject,
body=message, body=message,
to=addresses, to=addresses,
attachments=documents, attachments=attachments,
use_archive=use_archive_version,
) )
logger.debug( logger.debug(