mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	Refactor file consumption task to allow beginnings of a plugin system (#5367)
This commit is contained in:
		
							parent
							
								
									4dbf8d7969
								
							
						
					
					
						commit
						2da5e46386
					
				@ -3,7 +3,6 @@ import re
 | 
				
			|||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
from dataclasses import dataclass
 | 
					from dataclasses import dataclass
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from typing import Final
 | 
					 | 
				
			||||||
from typing import Optional
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
@ -15,8 +14,9 @@ from PIL import Image
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from documents.converters import convert_from_tiff_to_pdf
 | 
					from documents.converters import convert_from_tiff_to_pdf
 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.plugins.base import ConsumeTaskPlugin
 | 
				
			||||||
from documents.data_models import DocumentSource
 | 
					from documents.plugins.base import StopConsumeTaskError
 | 
				
			||||||
 | 
					from documents.plugins.helpers import ProgressStatusOptions
 | 
				
			||||||
from documents.utils import copy_basic_file_stats
 | 
					from documents.utils import copy_basic_file_stats
 | 
				
			||||||
from documents.utils import copy_file_with_basic_stats
 | 
					from documents.utils import copy_file_with_basic_stats
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -26,7 +26,7 @@ logger = logging.getLogger("paperless.barcodes")
 | 
				
			|||||||
@dataclass(frozen=True)
 | 
					@dataclass(frozen=True)
 | 
				
			||||||
class Barcode:
 | 
					class Barcode:
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Holds the information about a single barcode and its location
 | 
					    Holds the information about a single barcode and its location in a document
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    page: int
 | 
					    page: int
 | 
				
			||||||
@ -49,77 +49,111 @@ class Barcode:
 | 
				
			|||||||
        return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
 | 
					        return self.value.startswith(settings.CONSUMER_ASN_BARCODE_PREFIX)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BarcodeReader:
 | 
					class BarcodePlugin(ConsumeTaskPlugin):
 | 
				
			||||||
    def __init__(self, filepath: Path, mime_type: str) -> None:
 | 
					    NAME: str = "BarcodePlugin"
 | 
				
			||||||
        self.file: Final[Path] = filepath
 | 
					 | 
				
			||||||
        self.mime: Final[str] = mime_type
 | 
					 | 
				
			||||||
        self.pdf_file: Path = self.file
 | 
					 | 
				
			||||||
        self.barcodes: list[Barcode] = []
 | 
					 | 
				
			||||||
        self._tiff_conversion_done = False
 | 
					 | 
				
			||||||
        self.temp_dir: Optional[tempfile.TemporaryDirectory] = None
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def able_to_run(self) -> bool:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Able to run if:
 | 
				
			||||||
 | 
					          - ASN from barcode detection is enabled or
 | 
				
			||||||
 | 
					          - Barcode support is enabled and the mime type is supported
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
        if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
 | 
					        if settings.CONSUMER_BARCODE_TIFF_SUPPORT:
 | 
				
			||||||
            self.SUPPORTED_FILE_MIMES = {"application/pdf", "image/tiff"}
 | 
					            supported_mimes = {"application/pdf", "image/tiff"}
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.SUPPORTED_FILE_MIMES = {"application/pdf"}
 | 
					            supported_mimes = {"application/pdf"}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __enter__(self):
 | 
					        return (
 | 
				
			||||||
        if self.supported_mime_type:
 | 
					            settings.CONSUMER_ENABLE_ASN_BARCODE or settings.CONSUMER_ENABLE_BARCODES
 | 
				
			||||||
            self.temp_dir = tempfile.TemporaryDirectory(prefix="paperless-barcodes")
 | 
					        ) and self.input_doc.mime_type in supported_mimes
 | 
				
			||||||
        return self
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __exit__(self, exc_type, exc_val, exc_tb):
 | 
					    def setup(self):
 | 
				
			||||||
        if self.temp_dir is not None:
 | 
					        self.temp_dir = tempfile.TemporaryDirectory(
 | 
				
			||||||
            self.temp_dir.cleanup()
 | 
					            dir=self.base_tmp_dir,
 | 
				
			||||||
            self.temp_dir = None
 | 
					            prefix="barcode",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self.pdf_file = self.input_doc.original_file
 | 
				
			||||||
 | 
					        self._tiff_conversion_done = False
 | 
				
			||||||
 | 
					        self.barcodes: list[Barcode] = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    def run(self) -> Optional[str]:
 | 
				
			||||||
    def supported_mime_type(self) -> bool:
 | 
					        # Maybe do the conversion of TIFF to PDF
 | 
				
			||||||
        """
 | 
					        self.convert_from_tiff_to_pdf()
 | 
				
			||||||
        Return True if the given mime type is supported for barcodes, false otherwise
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        return self.mime in self.SUPPORTED_FILE_MIMES
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					        # Locate any barcodes in the files
 | 
				
			||||||
    def asn(self) -> Optional[int]:
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Search the parsed barcodes for any ASNs.
 | 
					 | 
				
			||||||
        The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
 | 
					 | 
				
			||||||
        is considered the ASN to be used.
 | 
					 | 
				
			||||||
        Returns the detected ASN (or None)
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        asn = None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        if not self.supported_mime_type:
 | 
					 | 
				
			||||||
            return None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Ensure the barcodes have been read
 | 
					 | 
				
			||||||
        self.detect()
 | 
					        self.detect()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
 | 
					        # Update/overwrite an ASN if possible
 | 
				
			||||||
        asn_text = next(
 | 
					        located_asn = self.asn
 | 
				
			||||||
            (x.value for x in self.barcodes if x.is_asn),
 | 
					        if located_asn is not None:
 | 
				
			||||||
            None,
 | 
					            logger.info(f"Found ASN in barcode: {located_asn}")
 | 
				
			||||||
 | 
					            self.metadata.asn = located_asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        separator_pages = self.get_separation_pages()
 | 
				
			||||||
 | 
					        if not separator_pages:
 | 
				
			||||||
 | 
					            return "No pages to split on!"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # We have pages to split against
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Note this does NOT use the base_temp_dir, as that will be removed
 | 
				
			||||||
 | 
					        tmp_dir = Path(
 | 
				
			||||||
 | 
					            tempfile.mkdtemp(
 | 
				
			||||||
 | 
					                dir=settings.SCRATCH_DIR,
 | 
				
			||||||
 | 
					                prefix="paperless-barcode-split-",
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					        ).resolve()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        from documents import tasks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Create the split document tasks
 | 
				
			||||||
 | 
					        for new_document in self.separate_pages(separator_pages):
 | 
				
			||||||
 | 
					            copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            task = tasks.consume_file.delay(
 | 
				
			||||||
 | 
					                ConsumableDocument(
 | 
				
			||||||
 | 
					                    # Same source, for templates
 | 
				
			||||||
 | 
					                    source=self.input_doc.source,
 | 
				
			||||||
 | 
					                    mailrule_id=self.input_doc.mailrule_id,
 | 
				
			||||||
 | 
					                    # Can't use same folder or the consume might grab it again
 | 
				
			||||||
 | 
					                    original_file=(tmp_dir / new_document.name).resolve(),
 | 
				
			||||||
 | 
					                ),
 | 
				
			||||||
 | 
					                # All the same metadata
 | 
				
			||||||
 | 
					                self.metadata,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					            logger.info(f"Created new task {task.id} for {new_document.name}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if asn_text:
 | 
					        # This file is now two or more files
 | 
				
			||||||
            logger.debug(f"Found ASN Barcode: {asn_text}")
 | 
					        self.input_doc.original_file.unlink()
 | 
				
			||||||
            # remove the prefix and remove whitespace
 | 
					 | 
				
			||||||
            asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # remove non-numeric parts of the remaining string
 | 
					        msg = "Barcode splitting complete!"
 | 
				
			||||||
            asn_text = re.sub(r"\D", "", asn_text)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # now, try parsing the ASN number
 | 
					        # Update the progress to complete
 | 
				
			||||||
            try:
 | 
					        self.status_mgr.send_progress(ProgressStatusOptions.SUCCESS, msg, 100, 100)
 | 
				
			||||||
                asn = int(asn_text)
 | 
					 | 
				
			||||||
            except ValueError as e:
 | 
					 | 
				
			||||||
                logger.warning(f"Failed to parse ASN number because: {e}")
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return asn
 | 
					        # Request the consume task stops
 | 
				
			||||||
 | 
					        raise StopConsumeTaskError(msg)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def cleanup(self) -> None:
 | 
				
			||||||
 | 
					        self.temp_dir.cleanup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def convert_from_tiff_to_pdf(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        May convert a TIFF image into a PDF, if the input is a TIFF and
 | 
				
			||||||
 | 
					        the TIFF has not been made into a PDF
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        # Nothing to do, pdf_file is already assigned correctly
 | 
				
			||||||
 | 
					        if self.input_doc.mime_type != "image/tiff" or self._tiff_conversion_done:
 | 
				
			||||||
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.pdf_file = convert_from_tiff_to_pdf(
 | 
				
			||||||
 | 
					            self.input_doc.original_file,
 | 
				
			||||||
 | 
					            Path(self.temp_dir.name),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        self._tiff_conversion_done = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def read_barcodes_zxing(image: Image) -> list[str]:
 | 
					    def read_barcodes_zxing(image: Image.Image) -> list[str]:
 | 
				
			||||||
        barcodes = []
 | 
					        barcodes = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        import zxingcpp
 | 
					        import zxingcpp
 | 
				
			||||||
@ -135,7 +169,7 @@ class BarcodeReader:
 | 
				
			|||||||
        return barcodes
 | 
					        return barcodes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def read_barcodes_pyzbar(image: Image) -> list[str]:
 | 
					    def read_barcodes_pyzbar(image: Image.Image) -> list[str]:
 | 
				
			||||||
        barcodes = []
 | 
					        barcodes = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        from pyzbar import pyzbar
 | 
					        from pyzbar import pyzbar
 | 
				
			||||||
@ -154,18 +188,6 @@ class BarcodeReader:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return barcodes
 | 
					        return barcodes
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def convert_from_tiff_to_pdf(self):
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        May convert a TIFF image into a PDF, if the input is a TIFF and
 | 
					 | 
				
			||||||
        the TIFF has not been made into a PDF
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        # Nothing to do, pdf_file is already assigned correctly
 | 
					 | 
				
			||||||
        if self.mime != "image/tiff" or self._tiff_conversion_done:
 | 
					 | 
				
			||||||
            return
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self._tiff_conversion_done = True
 | 
					 | 
				
			||||||
        self.pdf_file = convert_from_tiff_to_pdf(self.file, Path(self.temp_dir.name))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def detect(self) -> None:
 | 
					    def detect(self) -> None:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Scan all pages of the PDF as images, updating barcodes and the pages
 | 
					        Scan all pages of the PDF as images, updating barcodes and the pages
 | 
				
			||||||
@ -218,10 +240,45 @@ class BarcodeReader:
 | 
				
			|||||||
        # This file is really borked, allow the consumption to continue
 | 
					        # This file is really borked, allow the consumption to continue
 | 
				
			||||||
        # but it may fail further on
 | 
					        # but it may fail further on
 | 
				
			||||||
        except Exception as e:  # pragma: no cover
 | 
					        except Exception as e:  # pragma: no cover
 | 
				
			||||||
            logger.exception(
 | 
					            logger.warning(
 | 
				
			||||||
                f"Exception during barcode scanning: {e}",
 | 
					                f"Exception during barcode scanning: {e}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def asn(self) -> Optional[int]:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Search the parsed barcodes for any ASNs.
 | 
				
			||||||
 | 
					        The first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
 | 
				
			||||||
 | 
					        is considered the ASN to be used.
 | 
				
			||||||
 | 
					        Returns the detected ASN (or None)
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        asn = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Ensure the barcodes have been read
 | 
				
			||||||
 | 
					        self.detect()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # get the first barcode that starts with CONSUMER_ASN_BARCODE_PREFIX
 | 
				
			||||||
 | 
					        asn_text = next(
 | 
				
			||||||
 | 
					            (x.value for x in self.barcodes if x.is_asn),
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if asn_text:
 | 
				
			||||||
 | 
					            logger.debug(f"Found ASN Barcode: {asn_text}")
 | 
				
			||||||
 | 
					            # remove the prefix and remove whitespace
 | 
				
			||||||
 | 
					            asn_text = asn_text[len(settings.CONSUMER_ASN_BARCODE_PREFIX) :].strip()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # remove non-numeric parts of the remaining string
 | 
				
			||||||
 | 
					            asn_text = re.sub(r"\D", "", asn_text)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            # now, try parsing the ASN number
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                asn = int(asn_text)
 | 
				
			||||||
 | 
					            except ValueError as e:
 | 
				
			||||||
 | 
					                logger.warning(f"Failed to parse ASN number because: {e}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_separation_pages(self) -> dict[int, bool]:
 | 
					    def get_separation_pages(self) -> dict[int, bool]:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Search the parsed barcodes for separators and returns a dict of page
 | 
					        Search the parsed barcodes for separators and returns a dict of page
 | 
				
			||||||
@ -251,7 +308,7 @@ class BarcodeReader:
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document_paths = []
 | 
					        document_paths = []
 | 
				
			||||||
        fname = self.file.stem
 | 
					        fname = self.input_doc.original_file.stem
 | 
				
			||||||
        with Pdf.open(self.pdf_file) as input_pdf:
 | 
					        with Pdf.open(self.pdf_file) as input_pdf:
 | 
				
			||||||
            # Start with an empty document
 | 
					            # Start with an empty document
 | 
				
			||||||
            current_document: list[Page] = []
 | 
					            current_document: list[Page] = []
 | 
				
			||||||
@ -292,58 +349,8 @@ class BarcodeReader:
 | 
				
			|||||||
                with open(savepath, "wb") as out:
 | 
					                with open(savepath, "wb") as out:
 | 
				
			||||||
                    dst.save(out)
 | 
					                    dst.save(out)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                copy_basic_file_stats(self.file, savepath)
 | 
					                copy_basic_file_stats(self.input_doc.original_file, savepath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                document_paths.append(savepath)
 | 
					                document_paths.append(savepath)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return document_paths
 | 
					            return document_paths
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def separate(
 | 
					 | 
				
			||||||
        self,
 | 
					 | 
				
			||||||
        source: DocumentSource,
 | 
					 | 
				
			||||||
        overrides: DocumentMetadataOverrides,
 | 
					 | 
				
			||||||
    ) -> bool:
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Separates the document, based on barcodes and configuration, creating new
 | 
					 | 
				
			||||||
        documents as required in the appropriate location.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Returns True if a split happened, False otherwise
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        # Do nothing
 | 
					 | 
				
			||||||
        if not self.supported_mime_type:
 | 
					 | 
				
			||||||
            logger.warning(f"Unsupported file format for barcode reader: {self.mime}")
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Does nothing unless needed
 | 
					 | 
				
			||||||
        self.convert_from_tiff_to_pdf()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Actually read the codes, if any
 | 
					 | 
				
			||||||
        self.detect()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        separator_pages = self.get_separation_pages()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Also do nothing
 | 
					 | 
				
			||||||
        if not separator_pages:
 | 
					 | 
				
			||||||
            logger.warning("No pages to split on!")
 | 
					 | 
				
			||||||
            return False
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        tmp_dir = Path(tempfile.mkdtemp(prefix="paperless-barcode-split-")).resolve()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        from documents import tasks
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Create the split document tasks
 | 
					 | 
				
			||||||
        for new_document in self.separate_pages(separator_pages):
 | 
					 | 
				
			||||||
            copy_file_with_basic_stats(new_document, tmp_dir / new_document.name)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            tasks.consume_file.delay(
 | 
					 | 
				
			||||||
                ConsumableDocument(
 | 
					 | 
				
			||||||
                    # Same source, for templates
 | 
					 | 
				
			||||||
                    source=source,
 | 
					 | 
				
			||||||
                    # Can't use same folder or the consume might grab it again
 | 
					 | 
				
			||||||
                    original_file=(tmp_dir / new_document.name).resolve(),
 | 
					 | 
				
			||||||
                ),
 | 
					 | 
				
			||||||
                # All the same metadata
 | 
					 | 
				
			||||||
                overrides,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        logger.info("Barcode splitting complete!")
 | 
					 | 
				
			||||||
        return True
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -21,7 +21,6 @@ from filelock import FileLock
 | 
				
			|||||||
from rest_framework.reverse import reverse
 | 
					from rest_framework.reverse import reverse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents.classifier import load_classifier
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
from documents.file_handling import create_source_path_directory
 | 
					from documents.file_handling import create_source_path_directory
 | 
				
			||||||
from documents.file_handling import generate_unique_filename
 | 
					from documents.file_handling import generate_unique_filename
 | 
				
			||||||
@ -42,12 +41,83 @@ from documents.parsers import ParseError
 | 
				
			|||||||
from documents.parsers import get_parser_class_for_mime_type
 | 
					from documents.parsers import get_parser_class_for_mime_type
 | 
				
			||||||
from documents.parsers import parse_date
 | 
					from documents.parsers import parse_date
 | 
				
			||||||
from documents.permissions import set_permissions_for_object
 | 
					from documents.permissions import set_permissions_for_object
 | 
				
			||||||
 | 
					from documents.plugins.base import AlwaysRunPluginMixin
 | 
				
			||||||
 | 
					from documents.plugins.base import ConsumeTaskPlugin
 | 
				
			||||||
 | 
					from documents.plugins.base import NoCleanupPluginMixin
 | 
				
			||||||
 | 
					from documents.plugins.base import NoSetupPluginMixin
 | 
				
			||||||
from documents.signals import document_consumption_finished
 | 
					from documents.signals import document_consumption_finished
 | 
				
			||||||
from documents.signals import document_consumption_started
 | 
					from documents.signals import document_consumption_started
 | 
				
			||||||
from documents.utils import copy_basic_file_stats
 | 
					from documents.utils import copy_basic_file_stats
 | 
				
			||||||
from documents.utils import copy_file_with_basic_stats
 | 
					from documents.utils import copy_file_with_basic_stats
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class WorkflowTriggerPlugin(
 | 
				
			||||||
 | 
					    NoCleanupPluginMixin,
 | 
				
			||||||
 | 
					    NoSetupPluginMixin,
 | 
				
			||||||
 | 
					    AlwaysRunPluginMixin,
 | 
				
			||||||
 | 
					    ConsumeTaskPlugin,
 | 
				
			||||||
 | 
					):
 | 
				
			||||||
 | 
					    NAME: str = "WorkflowTriggerPlugin"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def run(self) -> Optional[str]:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Get overrides from matching workflows
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        overrides = DocumentMetadataOverrides()
 | 
				
			||||||
 | 
					        for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
 | 
				
			||||||
 | 
					            template_overrides = DocumentMetadataOverrides()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if document_matches_workflow(
 | 
				
			||||||
 | 
					                self.input_doc,
 | 
				
			||||||
 | 
					                workflow,
 | 
				
			||||||
 | 
					                WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
 | 
				
			||||||
 | 
					            ):
 | 
				
			||||||
 | 
					                for action in workflow.actions.all():
 | 
				
			||||||
 | 
					                    if action.assign_title is not None:
 | 
				
			||||||
 | 
					                        template_overrides.title = action.assign_title
 | 
				
			||||||
 | 
					                    if action.assign_tags is not None:
 | 
				
			||||||
 | 
					                        template_overrides.tag_ids = [
 | 
				
			||||||
 | 
					                            tag.pk for tag in action.assign_tags.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					                    if action.assign_correspondent is not None:
 | 
				
			||||||
 | 
					                        template_overrides.correspondent_id = (
 | 
				
			||||||
 | 
					                            action.assign_correspondent.pk
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    if action.assign_document_type is not None:
 | 
				
			||||||
 | 
					                        template_overrides.document_type_id = (
 | 
				
			||||||
 | 
					                            action.assign_document_type.pk
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    if action.assign_storage_path is not None:
 | 
				
			||||||
 | 
					                        template_overrides.storage_path_id = (
 | 
				
			||||||
 | 
					                            action.assign_storage_path.pk
 | 
				
			||||||
 | 
					                        )
 | 
				
			||||||
 | 
					                    if action.assign_owner is not None:
 | 
				
			||||||
 | 
					                        template_overrides.owner_id = action.assign_owner.pk
 | 
				
			||||||
 | 
					                    if action.assign_view_users is not None:
 | 
				
			||||||
 | 
					                        template_overrides.view_users = [
 | 
				
			||||||
 | 
					                            user.pk for user in action.assign_view_users.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					                    if action.assign_view_groups is not None:
 | 
				
			||||||
 | 
					                        template_overrides.view_groups = [
 | 
				
			||||||
 | 
					                            group.pk for group in action.assign_view_groups.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					                    if action.assign_change_users is not None:
 | 
				
			||||||
 | 
					                        template_overrides.change_users = [
 | 
				
			||||||
 | 
					                            user.pk for user in action.assign_change_users.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					                    if action.assign_change_groups is not None:
 | 
				
			||||||
 | 
					                        template_overrides.change_groups = [
 | 
				
			||||||
 | 
					                            group.pk for group in action.assign_change_groups.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					                    if action.assign_custom_fields is not None:
 | 
				
			||||||
 | 
					                        template_overrides.custom_field_ids = [
 | 
				
			||||||
 | 
					                            field.pk for field in action.assign_custom_fields.all()
 | 
				
			||||||
 | 
					                        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    overrides.update(template_overrides)
 | 
				
			||||||
 | 
					        self.metadata.update(overrides)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ConsumerError(Exception):
 | 
					class ConsumerError(Exception):
 | 
				
			||||||
    pass
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -602,70 +672,6 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return document
 | 
					        return document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_workflow_overrides(
 | 
					 | 
				
			||||||
        self,
 | 
					 | 
				
			||||||
        input_doc: ConsumableDocument,
 | 
					 | 
				
			||||||
    ) -> DocumentMetadataOverrides:
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        Get overrides from matching workflows
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        overrides = DocumentMetadataOverrides()
 | 
					 | 
				
			||||||
        for workflow in Workflow.objects.filter(enabled=True).order_by("order"):
 | 
					 | 
				
			||||||
            template_overrides = DocumentMetadataOverrides()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if document_matches_workflow(
 | 
					 | 
				
			||||||
                input_doc,
 | 
					 | 
				
			||||||
                workflow,
 | 
					 | 
				
			||||||
                WorkflowTrigger.WorkflowTriggerType.CONSUMPTION,
 | 
					 | 
				
			||||||
            ):
 | 
					 | 
				
			||||||
                for action in workflow.actions.all():
 | 
					 | 
				
			||||||
                    self.log.info(
 | 
					 | 
				
			||||||
                        f"Applying overrides in {action} from {workflow}",
 | 
					 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                    if action.assign_title is not None:
 | 
					 | 
				
			||||||
                        template_overrides.title = action.assign_title
 | 
					 | 
				
			||||||
                    if action.assign_tags is not None:
 | 
					 | 
				
			||||||
                        template_overrides.tag_ids = [
 | 
					 | 
				
			||||||
                            tag.pk for tag in action.assign_tags.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
                    if action.assign_correspondent is not None:
 | 
					 | 
				
			||||||
                        template_overrides.correspondent_id = (
 | 
					 | 
				
			||||||
                            action.assign_correspondent.pk
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
                    if action.assign_document_type is not None:
 | 
					 | 
				
			||||||
                        template_overrides.document_type_id = (
 | 
					 | 
				
			||||||
                            action.assign_document_type.pk
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
                    if action.assign_storage_path is not None:
 | 
					 | 
				
			||||||
                        template_overrides.storage_path_id = (
 | 
					 | 
				
			||||||
                            action.assign_storage_path.pk
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
                    if action.assign_owner is not None:
 | 
					 | 
				
			||||||
                        template_overrides.owner_id = action.assign_owner.pk
 | 
					 | 
				
			||||||
                    if action.assign_view_users is not None:
 | 
					 | 
				
			||||||
                        template_overrides.view_users = [
 | 
					 | 
				
			||||||
                            user.pk for user in action.assign_view_users.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
                    if action.assign_view_groups is not None:
 | 
					 | 
				
			||||||
                        template_overrides.view_groups = [
 | 
					 | 
				
			||||||
                            group.pk for group in action.assign_view_groups.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
                    if action.assign_change_users is not None:
 | 
					 | 
				
			||||||
                        template_overrides.change_users = [
 | 
					 | 
				
			||||||
                            user.pk for user in action.assign_change_users.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
                    if action.assign_change_groups is not None:
 | 
					 | 
				
			||||||
                        template_overrides.change_groups = [
 | 
					 | 
				
			||||||
                            group.pk for group in action.assign_change_groups.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
                    if action.assign_custom_fields is not None:
 | 
					 | 
				
			||||||
                        template_overrides.custom_field_ids = [
 | 
					 | 
				
			||||||
                            field.pk for field in action.assign_custom_fields.all()
 | 
					 | 
				
			||||||
                        ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    overrides.update(template_overrides)
 | 
					 | 
				
			||||||
        return overrides
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _parse_title_placeholders(self, title: str) -> str:
 | 
					    def _parse_title_placeholders(self, title: str) -> str:
 | 
				
			||||||
        local_added = timezone.localtime(timezone.now())
 | 
					        local_added = timezone.localtime(timezone.now())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -3,24 +3,41 @@ import logging
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from typing import Final
 | 
				
			||||||
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from pikepdf import Pdf
 | 
					from pikepdf import Pdf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents.consumer import ConsumerError
 | 
					from documents.consumer import ConsumerError
 | 
				
			||||||
from documents.converters import convert_from_tiff_to_pdf
 | 
					from documents.converters import convert_from_tiff_to_pdf
 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.plugins.base import ConsumeTaskPlugin
 | 
				
			||||||
 | 
					from documents.plugins.base import NoCleanupPluginMixin
 | 
				
			||||||
 | 
					from documents.plugins.base import NoSetupPluginMixin
 | 
				
			||||||
 | 
					from documents.plugins.base import StopConsumeTaskError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.double_sided")
 | 
					logger = logging.getLogger("paperless.double_sided")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Hardcoded for now, could be made a configurable setting if needed
 | 
					# Hardcoded for now, could be made a configurable setting if needed
 | 
				
			||||||
TIMEOUT_MINUTES = 30
 | 
					TIMEOUT_MINUTES: Final[int] = 30
 | 
				
			||||||
 | 
					TIMEOUT_SECONDS: Final[int] = TIMEOUT_MINUTES * 60
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Used by test cases
 | 
					# Used by test cases
 | 
				
			||||||
STAGING_FILE_NAME = "double-sided-staging.pdf"
 | 
					STAGING_FILE_NAME = "double-sided-staging.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def collate(input_doc: ConsumableDocument) -> str:
 | 
					class CollatePlugin(NoCleanupPluginMixin, NoSetupPluginMixin, ConsumeTaskPlugin):
 | 
				
			||||||
 | 
					    NAME: str = "CollatePlugin"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def able_to_run(self) -> bool:
 | 
				
			||||||
 | 
					        return (
 | 
				
			||||||
 | 
					            settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED
 | 
				
			||||||
 | 
					            and settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
 | 
				
			||||||
 | 
					            in self.input_doc.original_file.parts
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def run(self) -> Optional[str]:
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        Tries to collate pages from 2 single sided scans of a double sided
 | 
					        Tries to collate pages from 2 single sided scans of a double sided
 | 
				
			||||||
        document.
 | 
					        document.
 | 
				
			||||||
@ -39,33 +56,32 @@ def collate(input_doc: ConsumableDocument) -> str:
 | 
				
			|||||||
        in case of failure.
 | 
					        in case of failure.
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Make sure scratch dir exists, Consumer might not have run yet
 | 
					        if self.input_doc.mime_type == "application/pdf":
 | 
				
			||||||
    settings.SCRATCH_DIR.mkdir(exist_ok=True)
 | 
					            pdf_file = self.input_doc.original_file
 | 
				
			||||||
 | 
					 | 
				
			||||||
    if input_doc.mime_type == "application/pdf":
 | 
					 | 
				
			||||||
        pdf_file = input_doc.original_file
 | 
					 | 
				
			||||||
        elif (
 | 
					        elif (
 | 
				
			||||||
        input_doc.mime_type == "image/tiff"
 | 
					            self.input_doc.mime_type == "image/tiff"
 | 
				
			||||||
            and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
 | 
					            and settings.CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            pdf_file = convert_from_tiff_to_pdf(
 | 
					            pdf_file = convert_from_tiff_to_pdf(
 | 
				
			||||||
            input_doc.original_file,
 | 
					                self.input_doc.original_file,
 | 
				
			||||||
            settings.SCRATCH_DIR,
 | 
					                self.base_tmp_dir,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        input_doc.original_file.unlink()
 | 
					            self.input_doc.original_file.unlink()
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
        raise ConsumerError("Unsupported file type for collation of double-sided scans")
 | 
					            raise ConsumerError(
 | 
				
			||||||
 | 
					                "Unsupported file type for collation of double-sided scans",
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    staging = settings.SCRATCH_DIR / STAGING_FILE_NAME
 | 
					        staging: Path = settings.SCRATCH_DIR / STAGING_FILE_NAME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        valid_staging_exists = False
 | 
					        valid_staging_exists = False
 | 
				
			||||||
        if staging.exists():
 | 
					        if staging.exists():
 | 
				
			||||||
        stats = os.stat(str(staging))
 | 
					            stats = staging.stat()
 | 
				
			||||||
            # if the file is older than the timeout, we don't consider
 | 
					            # if the file is older than the timeout, we don't consider
 | 
				
			||||||
            # it valid
 | 
					            # it valid
 | 
				
			||||||
        if dt.datetime.now().timestamp() - stats.st_mtime > TIMEOUT_MINUTES * 60:
 | 
					            if (dt.datetime.now().timestamp() - stats.st_mtime) > TIMEOUT_SECONDS:
 | 
				
			||||||
                logger.warning("Outdated double sided staging file exists, deleting it")
 | 
					                logger.warning("Outdated double sided staging file exists, deleting it")
 | 
				
			||||||
            os.unlink(str(staging))
 | 
					                staging.unlink()
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                valid_staging_exists = True
 | 
					                valid_staging_exists = True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -88,23 +104,24 @@ def collate(input_doc: ConsumableDocument) -> str:
 | 
				
			|||||||
                    # Merged file has the same path, but without the
 | 
					                    # Merged file has the same path, but without the
 | 
				
			||||||
                    # double-sided subdir. Therefore, it is also in the
 | 
					                    # double-sided subdir. Therefore, it is also in the
 | 
				
			||||||
                    # consumption dir and will be picked up for processing
 | 
					                    # consumption dir and will be picked up for processing
 | 
				
			||||||
                old_file = input_doc.original_file
 | 
					                    old_file = self.input_doc.original_file
 | 
				
			||||||
                    new_file = Path(
 | 
					                    new_file = Path(
 | 
				
			||||||
                        *(
 | 
					                        *(
 | 
				
			||||||
                            part
 | 
					                            part
 | 
				
			||||||
                            for part in old_file.with_name(
 | 
					                            for part in old_file.with_name(
 | 
				
			||||||
                                f"{old_file.stem}-collated.pdf",
 | 
					                                f"{old_file.stem}-collated.pdf",
 | 
				
			||||||
                            ).parts
 | 
					                            ).parts
 | 
				
			||||||
                        if part != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
 | 
					                            if part
 | 
				
			||||||
 | 
					                            != settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
 | 
				
			||||||
                        ),
 | 
					                        ),
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    # If the user didn't create the subdirs yet, do it for them
 | 
					                    # If the user didn't create the subdirs yet, do it for them
 | 
				
			||||||
                    new_file.parent.mkdir(parents=True, exist_ok=True)
 | 
					                    new_file.parent.mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
                    pdf1.save(new_file)
 | 
					                    pdf1.save(new_file)
 | 
				
			||||||
                logger.info("Collated documents into new file %s", new_file)
 | 
					                logger.info("Collated documents into new file %s", new_file)
 | 
				
			||||||
            return (
 | 
					                raise StopConsumeTaskError(
 | 
				
			||||||
                    "Success. Even numbered pages of double sided scan collated "
 | 
					                    "Success. Even numbered pages of double sided scan collated "
 | 
				
			||||||
                "with odd pages"
 | 
					                    "with odd pages",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            finally:
 | 
					            finally:
 | 
				
			||||||
                # Delete staging and recently uploaded file no matter what.
 | 
					                # Delete staging and recently uploaded file no matter what.
 | 
				
			||||||
@ -118,12 +135,13 @@ def collate(input_doc: ConsumableDocument) -> str:
 | 
				
			|||||||
            shutil.move(pdf_file, staging)
 | 
					            shutil.move(pdf_file, staging)
 | 
				
			||||||
            # update access to modification time so we know if the file
 | 
					            # update access to modification time so we know if the file
 | 
				
			||||||
            # is outdated when another file gets uploaded
 | 
					            # is outdated when another file gets uploaded
 | 
				
			||||||
        os.utime(staging, (dt.datetime.now().timestamp(),) * 2)
 | 
					            timestamp = dt.datetime.now().timestamp()
 | 
				
			||||||
 | 
					            os.utime(staging, (timestamp, timestamp))
 | 
				
			||||||
            logger.info(
 | 
					            logger.info(
 | 
				
			||||||
                "Got scan with odd numbered pages of double-sided scan, moved it to %s",
 | 
					                "Got scan with odd numbered pages of double-sided scan, moved it to %s",
 | 
				
			||||||
                staging,
 | 
					                staging,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        return (
 | 
					            raise StopConsumeTaskError(
 | 
				
			||||||
                "Received odd numbered pages of double sided scan, waiting up to "
 | 
					                "Received odd numbered pages of double sided scan, waiting up to "
 | 
				
			||||||
            f"{TIMEOUT_MINUTES} minutes for even numbered pages"
 | 
					                f"{TIMEOUT_MINUTES} minutes for even numbered pages",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								src/documents/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								src/documents/plugins/base.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,131 @@
 | 
				
			|||||||
 | 
					import abc
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from typing import Final
 | 
				
			||||||
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
 | 
					from documents.plugins.helpers import ProgressManager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class StopConsumeTaskError(Exception):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A plugin setup or run may raise this to exit the asynchronous consume task.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Most likely, this means it has created one or more new tasks to execute instead,
 | 
				
			||||||
 | 
					    such as when a barcode has been used to create new documents
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, message: str) -> None:
 | 
				
			||||||
 | 
					        self.message = message
 | 
				
			||||||
 | 
					        super().__init__(message)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ConsumeTaskPlugin(abc.ABC):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Defines the interface for a plugin for the document consume task
 | 
				
			||||||
 | 
					    Meanings as per RFC2119 (https://datatracker.ietf.org/doc/html/rfc2119)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Plugin Implementation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    The plugin SHALL implement property able_to_run and methods setup, run and cleanup.
 | 
				
			||||||
 | 
					    The plugin property able_to_run SHALL return True if the plugin is able to run, given the conditions, settings and document information.
 | 
				
			||||||
 | 
					    The plugin property able_to_run MAY be hardcoded to return True.
 | 
				
			||||||
 | 
					    The plugin setup SHOULD perform any resource creation or additional initialization needed to run the document.
 | 
				
			||||||
 | 
					    The plugin setup MAY be a non-operation.
 | 
				
			||||||
 | 
					    The plugin cleanup SHOULD perform resource cleanup, including in the event of an error.
 | 
				
			||||||
 | 
					    The plugin cleanup MAY be a non-operation.
 | 
				
			||||||
 | 
					    The plugin run SHALL perform any operations against the document or system state required for the plugin.
 | 
				
			||||||
 | 
					    The plugin run MAY update the document metadata.
 | 
				
			||||||
 | 
					    The plugin run MAY return an informational message.
 | 
				
			||||||
 | 
					    The plugin run MAY raise StopConsumeTaskError to cease any further operations against the document.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Plugin Manager Implementation
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    The plugin manager SHALL provide the plugin with the input document, document metadata, progress manager and a created temporary directory.
 | 
				
			||||||
 | 
					    The plugin manager SHALL execute the plugin setup, run and cleanup, in that order IF the plugin property able_to_run is True.
 | 
				
			||||||
 | 
					    The plugin manager SHOULD log the return message of executing a plugin's run.
 | 
				
			||||||
 | 
					    The plugin manager SHALL always execute the plugin cleanup, IF the plugin property able_to_run is True.
 | 
				
			||||||
 | 
					    The plugin manager SHALL cease calling plugins and exit the task IF a plugin raises StopConsumeTaskError.
 | 
				
			||||||
 | 
					    The plugin manager SHOULD return the StopConsumeTaskError message IF a plugin raises StopConsumeTaskError.
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    NAME: str = "ConsumeTaskPlugin"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        input_doc: ConsumableDocument,
 | 
				
			||||||
 | 
					        metadata: DocumentMetadataOverrides,
 | 
				
			||||||
 | 
					        status_mgr: ProgressManager,
 | 
				
			||||||
 | 
					        base_tmp_dir: Path,
 | 
				
			||||||
 | 
					        task_id: str,
 | 
				
			||||||
 | 
					    ) -> None:
 | 
				
			||||||
 | 
					        super().__init__()
 | 
				
			||||||
 | 
					        self.input_doc = input_doc
 | 
				
			||||||
 | 
					        self.metadata = metadata
 | 
				
			||||||
 | 
					        self.base_tmp_dir: Final = base_tmp_dir
 | 
				
			||||||
 | 
					        self.status_mgr = status_mgr
 | 
				
			||||||
 | 
					        self.task_id: Final = task_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @abc.abstractproperty
 | 
				
			||||||
 | 
					    def able_to_run(self) -> bool:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Return True if the conditions are met for the plugin to run, False otherwise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        If False, setup(), run() and cleanup() will not be called
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @abc.abstractmethod
 | 
				
			||||||
 | 
					    def setup(self) -> None:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Allows the plugin to perform any additional setup it may need, such as creating
 | 
				
			||||||
 | 
					        a temporary directory, copying a file somewhere, etc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Executed before run()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        In general, this should be the "light" work, not the bulk of processing
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @abc.abstractmethod
 | 
				
			||||||
 | 
					    def run(self) -> Optional[str]:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        The bulk of plugin processing, this does whatever action the plugin is for.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Executed after setup() and before cleanup()
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @abc.abstractmethod
 | 
				
			||||||
 | 
					    def cleanup(self) -> None:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        Allows the plugin to execute any cleanup it may require
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        Executed after run(), even in the case of error
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class AlwaysRunPluginMixin(ConsumeTaskPlugin):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A plugin which is always able to run
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def able_to_run(self) -> bool:
 | 
				
			||||||
 | 
					        return True
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NoSetupPluginMixin(ConsumeTaskPlugin):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A plugin which requires no setup
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def setup(self) -> None:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class NoCleanupPluginMixin(ConsumeTaskPlugin):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A plugin which needs to clean up no files
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def cleanup(self) -> None:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
							
								
								
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								src/documents/plugins/helpers.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,82 @@
 | 
				
			|||||||
 | 
					import enum
 | 
				
			||||||
 | 
					from typing import TYPE_CHECKING
 | 
				
			||||||
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					from typing import Union
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from asgiref.sync import async_to_sync
 | 
				
			||||||
 | 
					from channels.layers import get_channel_layer
 | 
				
			||||||
 | 
					from channels_redis.pubsub import RedisPubSubChannelLayer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ProgressStatusOptions(str, enum.Enum):
 | 
				
			||||||
 | 
					    STARTED = "STARTED"
 | 
				
			||||||
 | 
					    WORKING = "WORKING"
 | 
				
			||||||
 | 
					    SUCCESS = "SUCCESS"
 | 
				
			||||||
 | 
					    FAILED = "FAILED"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ProgressManager:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Handles sending of progress information via the channel layer, with proper management
 | 
				
			||||||
 | 
					    of the open/close of the layer to ensure messages go out and everything is cleaned up
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
 | 
				
			||||||
 | 
					        self.filename = filename
 | 
				
			||||||
 | 
					        self._channel: Optional[RedisPubSubChannelLayer] = None
 | 
				
			||||||
 | 
					        self.task_id = task_id
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __enter__(self):
 | 
				
			||||||
 | 
					        self.open()
 | 
				
			||||||
 | 
					        return self
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __exit__(self, exc_type, exc_val, exc_tb):
 | 
				
			||||||
 | 
					        self.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def open(self) -> None:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        If not already opened, gets the default channel layer
 | 
				
			||||||
 | 
					        opened and ready to send messages
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        if self._channel is None:
 | 
				
			||||||
 | 
					            self._channel = get_channel_layer()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def close(self) -> None:
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        If it was opened, flushes the channel layer
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        if self._channel is not None:
 | 
				
			||||||
 | 
					            async_to_sync(self._channel.flush)
 | 
				
			||||||
 | 
					            self._channel = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def send_progress(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        status: ProgressStatusOptions,
 | 
				
			||||||
 | 
					        message: str,
 | 
				
			||||||
 | 
					        current_progress: int,
 | 
				
			||||||
 | 
					        max_progress: int,
 | 
				
			||||||
 | 
					        extra_args: Optional[dict[str, Union[str, int]]] = None,
 | 
				
			||||||
 | 
					    ) -> None:
 | 
				
			||||||
 | 
					        # Ensure the layer is open
 | 
				
			||||||
 | 
					        self.open()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Just for IDEs
 | 
				
			||||||
 | 
					        if TYPE_CHECKING:
 | 
				
			||||||
 | 
					            assert self._channel is not None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        payload = {
 | 
				
			||||||
 | 
					            "type": "status_update",
 | 
				
			||||||
 | 
					            "data": {
 | 
				
			||||||
 | 
					                "filename": self.filename,
 | 
				
			||||||
 | 
					                "task_id": self.task_id,
 | 
				
			||||||
 | 
					                "current_progress": current_progress,
 | 
				
			||||||
 | 
					                "max_progress": max_progress,
 | 
				
			||||||
 | 
					                "status": status,
 | 
				
			||||||
 | 
					                "message": message,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if extra_args is not None:
 | 
				
			||||||
 | 
					            payload["data"].update(extra_args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Construct and send the update
 | 
				
			||||||
 | 
					        async_to_sync(self._channel.group_send)("status_updates", payload)
 | 
				
			||||||
@ -2,30 +2,30 @@ import hashlib
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
import uuid
 | 
					import uuid
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from tempfile import TemporaryDirectory
 | 
				
			||||||
from typing import Optional
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from asgiref.sync import async_to_sync
 | 
					 | 
				
			||||||
from celery import Task
 | 
					from celery import Task
 | 
				
			||||||
from celery import shared_task
 | 
					from celery import shared_task
 | 
				
			||||||
from channels.layers import get_channel_layer
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db import transaction
 | 
					from django.db import transaction
 | 
				
			||||||
from django.db.models.signals import post_save
 | 
					from django.db.models.signals import post_save
 | 
				
			||||||
from filelock import FileLock
 | 
					from filelock import FileLock
 | 
				
			||||||
from redis.exceptions import ConnectionError
 | 
					 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents import index
 | 
					from documents import index
 | 
				
			||||||
from documents import sanity_checker
 | 
					from documents import sanity_checker
 | 
				
			||||||
from documents.barcodes import BarcodeReader
 | 
					from documents.barcodes import BarcodePlugin
 | 
				
			||||||
from documents.classifier import DocumentClassifier
 | 
					from documents.classifier import DocumentClassifier
 | 
				
			||||||
from documents.classifier import load_classifier
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
from documents.consumer import Consumer
 | 
					from documents.consumer import Consumer
 | 
				
			||||||
from documents.consumer import ConsumerError
 | 
					from documents.consumer import ConsumerError
 | 
				
			||||||
 | 
					from documents.consumer import WorkflowTriggerPlugin
 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
from documents.double_sided import collate
 | 
					from documents.double_sided import CollatePlugin
 | 
				
			||||||
from documents.file_handling import create_source_path_directory
 | 
					from documents.file_handling import create_source_path_directory
 | 
				
			||||||
from documents.file_handling import generate_unique_filename
 | 
					from documents.file_handling import generate_unique_filename
 | 
				
			||||||
from documents.models import Correspondent
 | 
					from documents.models import Correspondent
 | 
				
			||||||
@ -35,6 +35,10 @@ from documents.models import StoragePath
 | 
				
			|||||||
from documents.models import Tag
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.parsers import DocumentParser
 | 
					from documents.parsers import DocumentParser
 | 
				
			||||||
from documents.parsers import get_parser_class_for_mime_type
 | 
					from documents.parsers import get_parser_class_for_mime_type
 | 
				
			||||||
 | 
					from documents.plugins.base import ConsumeTaskPlugin
 | 
				
			||||||
 | 
					from documents.plugins.base import ProgressManager
 | 
				
			||||||
 | 
					from documents.plugins.base import StopConsumeTaskError
 | 
				
			||||||
 | 
					from documents.plugins.helpers import ProgressStatusOptions
 | 
				
			||||||
from documents.sanity_checker import SanityCheckFailedException
 | 
					from documents.sanity_checker import SanityCheckFailedException
 | 
				
			||||||
from documents.signals import document_updated
 | 
					from documents.signals import document_updated
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -102,70 +106,60 @@ def consume_file(
 | 
				
			|||||||
    input_doc: ConsumableDocument,
 | 
					    input_doc: ConsumableDocument,
 | 
				
			||||||
    overrides: Optional[DocumentMetadataOverrides] = None,
 | 
					    overrides: Optional[DocumentMetadataOverrides] = None,
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    def send_progress(status="SUCCESS", message="finished"):
 | 
					 | 
				
			||||||
        payload = {
 | 
					 | 
				
			||||||
            "filename": overrides.filename or input_doc.original_file.name,
 | 
					 | 
				
			||||||
            "task_id": None,
 | 
					 | 
				
			||||||
            "current_progress": 100,
 | 
					 | 
				
			||||||
            "max_progress": 100,
 | 
					 | 
				
			||||||
            "status": status,
 | 
					 | 
				
			||||||
            "message": message,
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            async_to_sync(get_channel_layer().group_send)(
 | 
					 | 
				
			||||||
                "status_updates",
 | 
					 | 
				
			||||||
                {"type": "status_update", "data": payload},
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
        except ConnectionError as e:
 | 
					 | 
				
			||||||
            logger.warning(f"ConnectionError on status send: {e!s}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    # Default no overrides
 | 
					    # Default no overrides
 | 
				
			||||||
    if overrides is None:
 | 
					    if overrides is None:
 | 
				
			||||||
        overrides = DocumentMetadataOverrides()
 | 
					        overrides = DocumentMetadataOverrides()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Handle collation of double-sided documents scanned in two parts
 | 
					    plugins: list[type[ConsumeTaskPlugin]] = [
 | 
				
			||||||
    if settings.CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED and (
 | 
					        CollatePlugin,
 | 
				
			||||||
        settings.CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME
 | 
					        BarcodePlugin,
 | 
				
			||||||
        in input_doc.original_file.parts
 | 
					        WorkflowTriggerPlugin,
 | 
				
			||||||
    ):
 | 
					    ]
 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            msg = collate(input_doc)
 | 
					 | 
				
			||||||
            send_progress(message=msg)
 | 
					 | 
				
			||||||
            return msg
 | 
					 | 
				
			||||||
        except ConsumerError as e:
 | 
					 | 
				
			||||||
            send_progress(status="FAILURE", message=e.args[0])
 | 
					 | 
				
			||||||
            raise e
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # read all barcodes in the current document
 | 
					    with ProgressManager(
 | 
				
			||||||
    if settings.CONSUMER_ENABLE_BARCODES or settings.CONSUMER_ENABLE_ASN_BARCODE:
 | 
					        overrides.filename or input_doc.original_file.name,
 | 
				
			||||||
        with BarcodeReader(input_doc.original_file, input_doc.mime_type) as reader:
 | 
					        self.request.id,
 | 
				
			||||||
            if settings.CONSUMER_ENABLE_BARCODES and reader.separate(
 | 
					    ) as status_mgr, TemporaryDirectory(dir=settings.SCRATCH_DIR) as tmp_dir:
 | 
				
			||||||
                input_doc.source,
 | 
					        tmp_dir = Path(tmp_dir)
 | 
				
			||||||
 | 
					        for plugin_class in plugins:
 | 
				
			||||||
 | 
					            plugin_name = plugin_class.NAME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            plugin = plugin_class(
 | 
				
			||||||
 | 
					                input_doc,
 | 
				
			||||||
                overrides,
 | 
					                overrides,
 | 
				
			||||||
            ):
 | 
					                status_mgr,
 | 
				
			||||||
                # notify the sender, otherwise the progress bar
 | 
					                tmp_dir,
 | 
				
			||||||
                # in the UI stays stuck
 | 
					                self.request.id,
 | 
				
			||||||
                send_progress()
 | 
					 | 
				
			||||||
                # consuming stops here, since the original document with
 | 
					 | 
				
			||||||
                # the barcodes has been split and will be consumed separately
 | 
					 | 
				
			||||||
                input_doc.original_file.unlink()
 | 
					 | 
				
			||||||
                return "File successfully split"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            # try reading the ASN from barcode
 | 
					 | 
				
			||||||
            if (
 | 
					 | 
				
			||||||
                settings.CONSUMER_ENABLE_ASN_BARCODE
 | 
					 | 
				
			||||||
                and (located_asn := reader.asn) is not None
 | 
					 | 
				
			||||||
            ):
 | 
					 | 
				
			||||||
                # Note this will take precedence over an API provided ASN
 | 
					 | 
				
			||||||
                # But it's from a physical barcode, so that's good
 | 
					 | 
				
			||||||
                overrides.asn = located_asn
 | 
					 | 
				
			||||||
                logger.info(f"Found ASN in barcode: {overrides.asn}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    template_overrides = Consumer().get_workflow_overrides(
 | 
					 | 
				
			||||||
        input_doc=input_doc,
 | 
					 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    overrides.update(template_overrides)
 | 
					            if not plugin.able_to_run:
 | 
				
			||||||
 | 
					                logger.debug(f"Skipping plugin {plugin_name}")
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            try:
 | 
				
			||||||
 | 
					                logger.debug(f"Executing plugin {plugin_name}")
 | 
				
			||||||
 | 
					                plugin.setup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                msg = plugin.run()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                if msg is not None:
 | 
				
			||||||
 | 
					                    logger.info(f"{plugin_name} completed with: {msg}")
 | 
				
			||||||
 | 
					                else:
 | 
				
			||||||
 | 
					                    logger.info(f"{plugin_name} completed with no message")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                overrides = plugin.metadata
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            except StopConsumeTaskError as e:
 | 
				
			||||||
 | 
					                logger.info(f"{plugin_name} requested task exit: {e.message}")
 | 
				
			||||||
 | 
					                return e.message
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            except Exception as e:
 | 
				
			||||||
 | 
					                logger.exception(f"{plugin_name} failed: {e}")
 | 
				
			||||||
 | 
					                status_mgr.send_progress(ProgressStatusOptions.FAILED, f"{e}", 100, 100)
 | 
				
			||||||
 | 
					                raise
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            finally:
 | 
				
			||||||
 | 
					                plugin.cleanup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # continue with consumption if no barcode was found
 | 
					    # continue with consumption if no barcode was found
 | 
				
			||||||
    document = Consumer().try_consume_file(
 | 
					    document = Consumer().try_consume_file(
 | 
				
			||||||
 | 
				
			|||||||
@ -1,4 +1,7 @@
 | 
				
			|||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
 | 
					from collections.abc import Generator
 | 
				
			||||||
 | 
					from contextlib import contextmanager
 | 
				
			||||||
 | 
					from pathlib import Path
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
@ -7,14 +10,13 @@ from django.test import TestCase
 | 
				
			|||||||
from django.test import override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents import tasks
 | 
					from documents import tasks
 | 
				
			||||||
from documents.barcodes import BarcodeReader
 | 
					from documents.barcodes import BarcodePlugin
 | 
				
			||||||
from documents.consumer import ConsumerError
 | 
					 | 
				
			||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
from documents.data_models import DocumentSource
 | 
					from documents.data_models import DocumentSource
 | 
				
			||||||
from documents.models import Document
 | 
					 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
from documents.tests.utils import DocumentConsumeDelayMixin
 | 
					from documents.tests.utils import DocumentConsumeDelayMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import DummyProgressManager
 | 
				
			||||||
from documents.tests.utils import FileSystemAssertsMixin
 | 
					from documents.tests.utils import FileSystemAssertsMixin
 | 
				
			||||||
from documents.tests.utils import SampleDirMixin
 | 
					from documents.tests.utils import SampleDirMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -26,8 +28,29 @@ except ImportError:
 | 
				
			|||||||
    HAS_ZXING_LIB = False
 | 
					    HAS_ZXING_LIB = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class GetReaderPluginMixin:
 | 
				
			||||||
 | 
					    @contextmanager
 | 
				
			||||||
 | 
					    def get_reader(self, filepath: Path) -> Generator[BarcodePlugin, None, None]:
 | 
				
			||||||
 | 
					        reader = BarcodePlugin(
 | 
				
			||||||
 | 
					            ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
 | 
				
			||||||
 | 
					            DocumentMetadataOverrides(),
 | 
				
			||||||
 | 
					            DummyProgressManager(filepath.name, None),
 | 
				
			||||||
 | 
					            self.dirs.scratch_dir,
 | 
				
			||||||
 | 
					            "task-id",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        reader.setup()
 | 
				
			||||||
 | 
					        yield reader
 | 
				
			||||||
 | 
					        reader.cleanup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
 | 
					@override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
 | 
				
			||||||
class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, TestCase):
 | 
					class TestBarcode(
 | 
				
			||||||
 | 
					    DirectoriesMixin,
 | 
				
			||||||
 | 
					    FileSystemAssertsMixin,
 | 
				
			||||||
 | 
					    SampleDirMixin,
 | 
				
			||||||
 | 
					    GetReaderPluginMixin,
 | 
				
			||||||
 | 
					    TestCase,
 | 
				
			||||||
 | 
					):
 | 
				
			||||||
    def test_scan_file_for_separating_barcodes(self):
 | 
					    def test_scan_file_for_separating_barcodes(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        GIVEN:
 | 
					        GIVEN:
 | 
				
			||||||
@ -39,7 +62,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -60,7 +83,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.tiff"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "image/tiff") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -80,7 +103,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-alpha.tiff"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "image/tiff") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -97,7 +120,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
            - No pages to split on
 | 
					            - No pages to split on
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -115,7 +138,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -133,7 +156,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "several-patcht-codes.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -158,7 +181,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        ]:
 | 
					        ]:
 | 
				
			||||||
            test_file = self.BARCODE_SAMPLE_DIR / test_file
 | 
					            test_file = self.BARCODE_SAMPLE_DIR / test_file
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					            with self.get_reader(test_file) as reader:
 | 
				
			||||||
                reader.detect()
 | 
					                reader.detect()
 | 
				
			||||||
                separator_page_numbers = reader.get_separation_pages()
 | 
					                separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -177,7 +200,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle-unreadable.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -195,7 +218,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-fax-image.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -214,7 +237,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-qr.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -234,7 +257,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -255,7 +278,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-custom.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -276,7 +299,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-128-custom.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -296,7 +319,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-custom.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -315,7 +338,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "many-qr-codes.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -334,7 +357,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "password-is-test.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "password-is-test.pdf"
 | 
				
			||||||
        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
 | 
					        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
 | 
				
			||||||
            with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					            with self.get_reader(test_file) as reader:
 | 
				
			||||||
                reader.detect()
 | 
					                reader.detect()
 | 
				
			||||||
                warning = cm.output[0]
 | 
					                warning = cm.output[0]
 | 
				
			||||||
                expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
 | 
					                expected_str = "WARNING:paperless.barcodes:File is likely password protected, not checking for barcodes"
 | 
				
			||||||
@ -356,7 +379,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-middle.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            documents = reader.separate_pages({1: False})
 | 
					            documents = reader.separate_pages({1: False})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(reader.pdf_file, test_file)
 | 
					            self.assertEqual(reader.pdf_file, test_file)
 | 
				
			||||||
@ -373,7 +396,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t-double.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            documents = reader.separate_pages({1: False, 2: False})
 | 
					            documents = reader.separate_pages({1: False, 2: False})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(len(documents), 2)
 | 
					            self.assertEqual(len(documents), 2)
 | 
				
			||||||
@ -385,32 +408,18 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        WHEN:
 | 
					        WHEN:
 | 
				
			||||||
            - No separation pages are provided
 | 
					            - No separation pages are provided
 | 
				
			||||||
        THEN:
 | 
					        THEN:
 | 
				
			||||||
            - No new documents are produced
 | 
					            - Nothing happens
 | 
				
			||||||
            - A warning is logged
 | 
					 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					            self.assertEqual("No pages to split on!", reader.run())
 | 
				
			||||||
                self.assertFalse(
 | 
					 | 
				
			||||||
                    reader.separate(
 | 
					 | 
				
			||||||
                        DocumentSource.ApiUpload,
 | 
					 | 
				
			||||||
                        DocumentMetadataOverrides(),
 | 
					 | 
				
			||||||
                    ),
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
                self.assertEqual(
 | 
					 | 
				
			||||||
                    cm.output,
 | 
					 | 
				
			||||||
                    [
 | 
					 | 
				
			||||||
                        "WARNING:paperless.barcodes:No pages to split on!",
 | 
					 | 
				
			||||||
                    ],
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        CONSUMER_ENABLE_BARCODES=True,
 | 
					        CONSUMER_ENABLE_BARCODES=True,
 | 
				
			||||||
        CONSUMER_BARCODE_TIFF_SUPPORT=True,
 | 
					        CONSUMER_BARCODE_TIFF_SUPPORT=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    @mock.patch("documents.consumer.Consumer.try_consume_file")
 | 
					    def test_consume_barcode_unsupported_jpg_file(self):
 | 
				
			||||||
    def test_consume_barcode_unsupported_jpg_file(self, m):
 | 
					 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        GIVEN:
 | 
					        GIVEN:
 | 
				
			||||||
            - JPEG image as input
 | 
					            - JPEG image as input
 | 
				
			||||||
@ -422,35 +431,8 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.jpg"
 | 
					        test_file = self.SAMPLE_DIR / "simple.jpg"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        dst = settings.SCRATCH_DIR / "simple.jpg"
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
        shutil.copy(test_file, dst)
 | 
					            self.assertFalse(reader.able_to_run)
 | 
				
			||||||
 | 
					 | 
				
			||||||
        with self.assertLogs("paperless.barcodes", level="WARNING") as cm:
 | 
					 | 
				
			||||||
            self.assertIn(
 | 
					 | 
				
			||||||
                "Success",
 | 
					 | 
				
			||||||
                tasks.consume_file(
 | 
					 | 
				
			||||||
                    ConsumableDocument(
 | 
					 | 
				
			||||||
                        source=DocumentSource.ConsumeFolder,
 | 
					 | 
				
			||||||
                        original_file=dst,
 | 
					 | 
				
			||||||
                    ),
 | 
					 | 
				
			||||||
                    None,
 | 
					 | 
				
			||||||
                ),
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.assertListEqual(
 | 
					 | 
				
			||||||
            cm.output,
 | 
					 | 
				
			||||||
            [
 | 
					 | 
				
			||||||
                "WARNING:paperless.barcodes:Unsupported file format for barcode reader: image/jpeg",
 | 
					 | 
				
			||||||
            ],
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        m.assert_called_once()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        args, kwargs = m.call_args
 | 
					 | 
				
			||||||
        self.assertIsNone(kwargs["override_filename"])
 | 
					 | 
				
			||||||
        self.assertIsNone(kwargs["override_title"])
 | 
					 | 
				
			||||||
        self.assertIsNone(kwargs["override_correspondent_id"])
 | 
					 | 
				
			||||||
        self.assertIsNone(kwargs["override_document_type_id"])
 | 
					 | 
				
			||||||
        self.assertIsNone(kwargs["override_tag_ids"])
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        CONSUMER_ENABLE_BARCODES=True,
 | 
					        CONSUMER_ENABLE_BARCODES=True,
 | 
				
			||||||
@ -467,7 +449,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-2.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -504,7 +486,7 @@ class TestBarcode(DirectoriesMixin, FileSystemAssertsMixin, SampleDirMixin, Test
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "split-by-asn-1.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            separator_page_numbers = reader.get_separation_pages()
 | 
					            separator_page_numbers = reader.get_separation_pages()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -550,7 +532,7 @@ class TestBarcodeNewConsume(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
 | 
					        overrides = DocumentMetadataOverrides(tag_ids=[1, 2, 9])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync") as progress_mocker:
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            self.assertEqual(
 | 
					            self.assertEqual(
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -559,10 +541,8 @@ class TestBarcodeNewConsume(
 | 
				
			|||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                    overrides,
 | 
					                    overrides,
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                "File successfully split",
 | 
					                "Barcode splitting complete!",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            # We let the consumer know progress is done
 | 
					 | 
				
			||||||
            progress_mocker.assert_called_once()
 | 
					 | 
				
			||||||
            # 2 new document consume tasks created
 | 
					            # 2 new document consume tasks created
 | 
				
			||||||
            self.assertEqual(self.consume_file_mock.call_count, 2)
 | 
					            self.assertEqual(self.consume_file_mock.call_count, 2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -580,7 +560,20 @@ class TestBarcodeNewConsume(
 | 
				
			|||||||
                self.assertEqual(overrides, new_doc_overrides)
 | 
					                self.assertEqual(overrides, new_doc_overrides)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
					class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, GetReaderPluginMixin, TestCase):
 | 
				
			||||||
 | 
					    @contextmanager
 | 
				
			||||||
 | 
					    def get_reader(self, filepath: Path) -> BarcodePlugin:
 | 
				
			||||||
 | 
					        reader = BarcodePlugin(
 | 
				
			||||||
 | 
					            ConsumableDocument(DocumentSource.ConsumeFolder, original_file=filepath),
 | 
				
			||||||
 | 
					            DocumentMetadataOverrides(),
 | 
				
			||||||
 | 
					            DummyProgressManager(filepath.name, None),
 | 
				
			||||||
 | 
					            self.dirs.scratch_dir,
 | 
				
			||||||
 | 
					            "task-id",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        reader.setup()
 | 
				
			||||||
 | 
					        yield reader
 | 
				
			||||||
 | 
					        reader.cleanup()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
 | 
					    @override_settings(CONSUMER_ASN_BARCODE_PREFIX="CUSTOM-PREFIX-")
 | 
				
			||||||
    def test_scan_file_for_asn_custom_prefix(self):
 | 
					    def test_scan_file_for_asn_custom_prefix(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
@ -594,7 +587,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
            - The ASN integer value is correct
 | 
					            - The ASN integer value is correct
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-custom-prefix.pdf"
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            asn = reader.asn
 | 
					            asn = reader.asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(reader.pdf_file, test_file)
 | 
					            self.assertEqual(reader.pdf_file, test_file)
 | 
				
			||||||
@ -613,7 +606,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            asn = reader.asn
 | 
					            asn = reader.asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(reader.pdf_file, test_file)
 | 
					            self.assertEqual(reader.pdf_file, test_file)
 | 
				
			||||||
@ -630,55 +623,12 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "patch-code-t.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            asn = reader.asn
 | 
					            asn = reader.asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(reader.pdf_file, test_file)
 | 
					            self.assertEqual(reader.pdf_file, test_file)
 | 
				
			||||||
            self.assertEqual(asn, None)
 | 
					            self.assertEqual(asn, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
 | 
					 | 
				
			||||||
    def test_scan_file_for_asn_already_exists(self):
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        GIVEN:
 | 
					 | 
				
			||||||
            - PDF with an ASN barcode
 | 
					 | 
				
			||||||
            - ASN value already exists
 | 
					 | 
				
			||||||
        WHEN:
 | 
					 | 
				
			||||||
            - File is scanned for barcodes
 | 
					 | 
				
			||||||
        THEN:
 | 
					 | 
				
			||||||
            - ASN is retrieved from the document
 | 
					 | 
				
			||||||
            - Consumption fails
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        Document.objects.create(
 | 
					 | 
				
			||||||
            title="WOW",
 | 
					 | 
				
			||||||
            content="the content",
 | 
					 | 
				
			||||||
            archive_serial_number=123,
 | 
					 | 
				
			||||||
            checksum="456",
 | 
					 | 
				
			||||||
            mime_type="application/pdf",
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-123.pdf"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
 | 
					 | 
				
			||||||
        shutil.copy(test_file, dst)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        with mock.patch("documents.consumer.Consumer._send_progress"):
 | 
					 | 
				
			||||||
            with self.assertRaises(ConsumerError) as cm, self.assertLogs(
 | 
					 | 
				
			||||||
                "paperless.consumer",
 | 
					 | 
				
			||||||
                level="ERROR",
 | 
					 | 
				
			||||||
            ) as logs_cm:
 | 
					 | 
				
			||||||
                tasks.consume_file(
 | 
					 | 
				
			||||||
                    ConsumableDocument(
 | 
					 | 
				
			||||||
                        source=DocumentSource.ConsumeFolder,
 | 
					 | 
				
			||||||
                        original_file=dst,
 | 
					 | 
				
			||||||
                    ),
 | 
					 | 
				
			||||||
                    None,
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
            self.assertIn("Not consuming barcode-39-asn-123.pdf", str(cm.exception))
 | 
					 | 
				
			||||||
            error_str = logs_cm.output[0]
 | 
					 | 
				
			||||||
            expected_str = "ERROR:paperless.consumer:Not consuming barcode-39-asn-123.pdf: Given ASN already exists!"
 | 
					 | 
				
			||||||
            self.assertEqual(expected_str, error_str)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_scan_file_for_asn_barcode_invalid(self):
 | 
					    def test_scan_file_for_asn_barcode_invalid(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        GIVEN:
 | 
					        GIVEN:
 | 
				
			||||||
@ -692,7 +642,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-39-asn-invalid.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            asn = reader.asn
 | 
					            asn = reader.asn
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(reader.pdf_file, test_file)
 | 
					            self.assertEqual(reader.pdf_file, test_file)
 | 
				
			||||||
@ -718,7 +668,9 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
        dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
 | 
					        dst = settings.SCRATCH_DIR / "barcode-39-asn-123.pdf"
 | 
				
			||||||
        shutil.copy(test_file, dst)
 | 
					        shutil.copy(test_file, dst)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.consumer.Consumer.try_consume_file") as mocked_call:
 | 
					        with mock.patch(
 | 
				
			||||||
 | 
					            "documents.consumer.Consumer.try_consume_file",
 | 
				
			||||||
 | 
					        ) as mocked_consumer:
 | 
				
			||||||
            tasks.consume_file(
 | 
					            tasks.consume_file(
 | 
				
			||||||
                ConsumableDocument(
 | 
					                ConsumableDocument(
 | 
				
			||||||
                    source=DocumentSource.ConsumeFolder,
 | 
					                    source=DocumentSource.ConsumeFolder,
 | 
				
			||||||
@ -726,40 +678,11 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
                ),
 | 
					                ),
 | 
				
			||||||
                None,
 | 
					                None,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					            mocked_consumer.assert_called_once()
 | 
				
			||||||
            args, kwargs = mocked_call.call_args
 | 
					            args, kwargs = mocked_consumer.call_args
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.assertEqual(kwargs["override_asn"], 123)
 | 
					            self.assertEqual(kwargs["override_asn"], 123)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(CONSUMER_ENABLE_ASN_BARCODE=True)
 | 
					 | 
				
			||||||
    def test_asn_too_large(self):
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        GIVEN:
 | 
					 | 
				
			||||||
            - ASN from barcode enabled
 | 
					 | 
				
			||||||
            - Barcode contains too large an ASN value
 | 
					 | 
				
			||||||
        WHEN:
 | 
					 | 
				
			||||||
            - ASN from barcode checked for correctness
 | 
					 | 
				
			||||||
        THEN:
 | 
					 | 
				
			||||||
            - Exception is raised regarding size limits
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        src = self.BARCODE_SAMPLE_DIR / "barcode-128-asn-too-large.pdf"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        dst = self.dirs.scratch_dir / "barcode-128-asn-too-large.pdf"
 | 
					 | 
				
			||||||
        shutil.copy(src, dst)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        input_doc = ConsumableDocument(
 | 
					 | 
				
			||||||
            source=DocumentSource.ConsumeFolder,
 | 
					 | 
				
			||||||
            original_file=dst,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        with mock.patch("documents.consumer.Consumer._send_progress"):
 | 
					 | 
				
			||||||
            self.assertRaisesMessage(
 | 
					 | 
				
			||||||
                ConsumerError,
 | 
					 | 
				
			||||||
                "Given ASN 4294967296 is out of range [0, 4,294,967,295]",
 | 
					 | 
				
			||||||
                tasks.consume_file,
 | 
					 | 
				
			||||||
                input_doc,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
 | 
					    @override_settings(CONSUMER_BARCODE_SCANNER="PYZBAR")
 | 
				
			||||||
    def test_scan_file_for_qrcode_without_upscale(self):
 | 
					    def test_scan_file_for_qrcode_without_upscale(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
@ -774,7 +697,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            self.assertEqual(len(reader.barcodes), 0)
 | 
					            self.assertEqual(len(reader.barcodes), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -796,7 +719,7 @@ class TestAsnBarcode(DirectoriesMixin, SampleDirMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
 | 
					        test_file = self.BARCODE_SAMPLE_DIR / "barcode-qr-asn-000123-upscale-dpi.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with BarcodeReader(test_file, "application/pdf") as reader:
 | 
					        with self.get_reader(test_file) as reader:
 | 
				
			||||||
            reader.detect()
 | 
					            reader.detect()
 | 
				
			||||||
            self.assertEqual(len(reader.barcodes), 1)
 | 
					            self.assertEqual(len(reader.barcodes), 1)
 | 
				
			||||||
            self.assertEqual(reader.asn, 123)
 | 
					            self.assertEqual(reader.asn, 123)
 | 
				
			||||||
 | 
				
			|||||||
@ -17,6 +17,7 @@ from documents.data_models import DocumentSource
 | 
				
			|||||||
from documents.double_sided import STAGING_FILE_NAME
 | 
					from documents.double_sided import STAGING_FILE_NAME
 | 
				
			||||||
from documents.double_sided import TIMEOUT_MINUTES
 | 
					from documents.double_sided import TIMEOUT_MINUTES
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import DummyProgressManager
 | 
				
			||||||
from documents.tests.utils import FileSystemAssertsMixin
 | 
					from documents.tests.utils import FileSystemAssertsMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -42,9 +43,10 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 | 
				
			|||||||
        dst = self.dirs.double_sided_dir / dstname
 | 
					        dst = self.dirs.double_sided_dir / dstname
 | 
				
			||||||
        dst.parent.mkdir(parents=True, exist_ok=True)
 | 
					        dst.parent.mkdir(parents=True, exist_ok=True)
 | 
				
			||||||
        shutil.copy(src, dst)
 | 
					        shutil.copy(src, dst)
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"), mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.consumer.async_to_sync",
 | 
					            "documents.tasks.ProgressManager",
 | 
				
			||||||
        ):
 | 
					            DummyProgressManager,
 | 
				
			||||||
 | 
					        ), mock.patch("documents.consumer.async_to_sync"):
 | 
				
			||||||
            msg = tasks.consume_file(
 | 
					            msg = tasks.consume_file(
 | 
				
			||||||
                ConsumableDocument(
 | 
					                ConsumableDocument(
 | 
				
			||||||
                    source=DocumentSource.ConsumeFolder,
 | 
					                    source=DocumentSource.ConsumeFolder,
 | 
				
			||||||
@ -211,7 +213,7 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf")
 | 
					        msg = self.consume_file("simple.pdf", Path("..") / "simple.pdf")
 | 
				
			||||||
        self.assertIsNotFile(self.staging_file)
 | 
					        self.assertIsNotFile(self.staging_file)
 | 
				
			||||||
        self.assertRegex(msg, "Success. New document .* created")
 | 
					        self.assertRegex(msg, r"Success. New document id \d+ created")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_subdirectory_upload(self):
 | 
					    def test_subdirectory_upload(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
@ -250,4 +252,4 @@ class TestDoubleSided(DirectoriesMixin, FileSystemAssertsMixin, TestCase):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        msg = self.consume_file("simple.pdf")
 | 
					        msg = self.consume_file("simple.pdf")
 | 
				
			||||||
        self.assertIsNotFile(self.staging_file)
 | 
					        self.assertIsNotFile(self.staging_file)
 | 
				
			||||||
        self.assertRegex(msg, "Success. New document .* created")
 | 
					        self.assertRegex(msg, r"Success. New document id \d+ created")
 | 
				
			||||||
 | 
				
			|||||||
@ -24,6 +24,7 @@ from documents.models import WorkflowAction
 | 
				
			|||||||
from documents.models import WorkflowTrigger
 | 
					from documents.models import WorkflowTrigger
 | 
				
			||||||
from documents.signals import document_consumption_finished
 | 
					from documents.signals import document_consumption_finished
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import DummyProgressManager
 | 
				
			||||||
from documents.tests.utils import FileSystemAssertsMixin
 | 
					from documents.tests.utils import FileSystemAssertsMixin
 | 
				
			||||||
from paperless_mail.models import MailAccount
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
from paperless_mail.models import MailRule
 | 
					from paperless_mail.models import MailRule
 | 
				
			||||||
@ -126,7 +127,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -203,7 +204,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
        w.save()
 | 
					        w.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -294,7 +295,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -356,7 +357,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -407,7 +408,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -468,7 +469,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -529,7 +530,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -591,7 +592,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="DEBUG") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
@ -686,7 +687,7 @@ class TestWorkflows(DirectoriesMixin, FileSystemAssertsMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
					        test_file = self.SAMPLE_DIR / "simple.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.tasks.async_to_sync"):
 | 
					        with mock.patch("documents.tasks.ProgressManager", DummyProgressManager):
 | 
				
			||||||
            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
					            with self.assertLogs("paperless.matching", level="INFO") as cm:
 | 
				
			||||||
                tasks.consume_file(
 | 
					                tasks.consume_file(
 | 
				
			||||||
                    ConsumableDocument(
 | 
					                    ConsumableDocument(
 | 
				
			||||||
 | 
				
			|||||||
@ -9,6 +9,7 @@ from os import PathLike
 | 
				
			|||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from typing import Any
 | 
					from typing import Any
 | 
				
			||||||
from typing import Callable
 | 
					from typing import Callable
 | 
				
			||||||
 | 
					from typing import Optional
 | 
				
			||||||
from typing import Union
 | 
					from typing import Union
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -23,6 +24,7 @@ from django.test import override_settings
 | 
				
			|||||||
from documents.data_models import ConsumableDocument
 | 
					from documents.data_models import ConsumableDocument
 | 
				
			||||||
from documents.data_models import DocumentMetadataOverrides
 | 
					from documents.data_models import DocumentMetadataOverrides
 | 
				
			||||||
from documents.parsers import ParseError
 | 
					from documents.parsers import ParseError
 | 
				
			||||||
 | 
					from documents.plugins.helpers import ProgressStatusOptions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def setup_directories():
 | 
					def setup_directories():
 | 
				
			||||||
@ -146,6 +148,11 @@ def util_call_with_backoff(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DirectoriesMixin:
 | 
					class DirectoriesMixin:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Creates and overrides settings for all folders and paths, then ensures
 | 
				
			||||||
 | 
					    they are cleaned up on exit
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, *args, **kwargs):
 | 
					    def __init__(self, *args, **kwargs):
 | 
				
			||||||
        super().__init__(*args, **kwargs)
 | 
					        super().__init__(*args, **kwargs)
 | 
				
			||||||
        self.dirs = None
 | 
					        self.dirs = None
 | 
				
			||||||
@ -160,6 +167,10 @@ class DirectoriesMixin:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FileSystemAssertsMixin:
 | 
					class FileSystemAssertsMixin:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Utilities for checks various state information of the file system
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def assertIsFile(self, path: Union[PathLike, str]):
 | 
					    def assertIsFile(self, path: Union[PathLike, str]):
 | 
				
			||||||
        self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
 | 
					        self.assertTrue(Path(path).resolve().is_file(), f"File does not exist: {path}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -188,6 +199,11 @@ class FileSystemAssertsMixin:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ConsumerProgressMixin:
 | 
					class ConsumerProgressMixin:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Mocks the Consumer _send_progress, preventing attempts to connect to Redis
 | 
				
			||||||
 | 
					    and allowing access to its calls for verification
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def setUp(self) -> None:
 | 
					    def setUp(self) -> None:
 | 
				
			||||||
        self.send_progress_patcher = mock.patch(
 | 
					        self.send_progress_patcher = mock.patch(
 | 
				
			||||||
            "documents.consumer.Consumer._send_progress",
 | 
					            "documents.consumer.Consumer._send_progress",
 | 
				
			||||||
@ -310,3 +326,59 @@ class SampleDirMixin:
 | 
				
			|||||||
    SAMPLE_DIR = Path(__file__).parent / "samples"
 | 
					    SAMPLE_DIR = Path(__file__).parent / "samples"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
 | 
					    BARCODE_SAMPLE_DIR = SAMPLE_DIR / "barcodes"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class DummyProgressManager:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    A dummy handler for progress management that doesn't actually try to
 | 
				
			||||||
 | 
					    connect to Redis.  Payloads are stored for test assertions if needed.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Use it with
 | 
				
			||||||
 | 
					      mock.patch("documents.tasks.ProgressManager", DummyProgressManager)
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, filename: str, task_id: Optional[str] = None) -> None:
 | 
				
			||||||
 | 
					        self.filename = filename
 | 
				
			||||||
 | 
					        self.task_id = task_id
 | 
				
			||||||
 | 
					        print("hello world")
 | 
				
			||||||
 | 
					        self.payloads = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __enter__(self):
 | 
				
			||||||
 | 
					        self.open()
 | 
				
			||||||
 | 
					        return self
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __exit__(self, exc_type, exc_val, exc_tb):
 | 
				
			||||||
 | 
					        self.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def open(self) -> None:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def close(self) -> None:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def send_progress(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        status: ProgressStatusOptions,
 | 
				
			||||||
 | 
					        message: str,
 | 
				
			||||||
 | 
					        current_progress: int,
 | 
				
			||||||
 | 
					        max_progress: int,
 | 
				
			||||||
 | 
					        extra_args: Optional[dict[str, Union[str, int]]] = None,
 | 
				
			||||||
 | 
					    ) -> None:
 | 
				
			||||||
 | 
					        # Ensure the layer is open
 | 
				
			||||||
 | 
					        self.open()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        payload = {
 | 
				
			||||||
 | 
					            "type": "status_update",
 | 
				
			||||||
 | 
					            "data": {
 | 
				
			||||||
 | 
					                "filename": self.filename,
 | 
				
			||||||
 | 
					                "task_id": self.task_id,
 | 
				
			||||||
 | 
					                "current_progress": current_progress,
 | 
				
			||||||
 | 
					                "max_progress": max_progress,
 | 
				
			||||||
 | 
					                "status": status,
 | 
				
			||||||
 | 
					                "message": message,
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if extra_args is not None:
 | 
				
			||||||
 | 
					            payload["data"].update(extra_args)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.payloads.append(payload)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user