mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Entirely removes the optipng, updates ghostscript fall back to also use WebP. Updates the conversion to use a multiprocessing pool
This commit is contained in:
		
							parent
							
								
									7d9a9033f9
								
							
						
					
					
						commit
						e8868d7ebf
					
				
							
								
								
									
										2
									
								
								.github/workflows/reusable-ci-backend.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/reusable-ci-backend.yml
									
									
									
									
										vendored
									
									
								
							@ -74,7 +74,7 @@ jobs:
 | 
				
			|||||||
        name: Install system dependencies
 | 
					        name: Install system dependencies
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
          sudo apt-get update -qq
 | 
					          sudo apt-get update -qq
 | 
				
			||||||
          sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript optipng libzbar0 poppler-utils
 | 
					          sudo apt-get install -qq --no-install-recommends unpaper tesseract-ocr imagemagick ghostscript libzbar0 poppler-utils
 | 
				
			||||||
      -
 | 
					      -
 | 
				
			||||||
        name: Install Python dependencies
 | 
					        name: Install Python dependencies
 | 
				
			||||||
        run: |
 | 
					        run: |
 | 
				
			||||||
 | 
				
			|||||||
@ -77,7 +77,6 @@ ARG RUNTIME_PACKAGES="\
 | 
				
			|||||||
  libraqm0 \
 | 
					  libraqm0 \
 | 
				
			||||||
  libgnutls30 \
 | 
					  libgnutls30 \
 | 
				
			||||||
  libjpeg62-turbo \
 | 
					  libjpeg62-turbo \
 | 
				
			||||||
  optipng \
 | 
					 | 
				
			||||||
  python3 \
 | 
					  python3 \
 | 
				
			||||||
  python3-pip \
 | 
					  python3-pip \
 | 
				
			||||||
  python3-setuptools \
 | 
					  python3-setuptools \
 | 
				
			||||||
 | 
				
			|||||||
@ -712,13 +712,6 @@ PAPERLESS_CONVERT_TMPDIR=<path>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    Default is none, which disables the temporary directory.
 | 
					    Default is none, which disables the temporary directory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PAPERLESS_OPTIMIZE_THUMBNAILS=<bool>
 | 
					 | 
				
			||||||
    Use optipng to optimize thumbnails. This usually reduces the size of
 | 
					 | 
				
			||||||
    thumbnails by about 20%, but uses considerable compute time during
 | 
					 | 
				
			||||||
    consumption.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Defaults to true.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
PAPERLESS_POST_CONSUME_SCRIPT=<filename>
 | 
					PAPERLESS_POST_CONSUME_SCRIPT=<filename>
 | 
				
			||||||
    After a document is consumed, Paperless can trigger an arbitrary script if
 | 
					    After a document is consumed, Paperless can trigger an arbitrary script if
 | 
				
			||||||
    you like.  This script will be passed a number of arguments for you to work
 | 
					    you like.  This script will be passed a number of arguments for you to work
 | 
				
			||||||
@ -789,9 +782,6 @@ PAPERLESS_CONVERT_BINARY=<path>
 | 
				
			|||||||
PAPERLESS_GS_BINARY=<path>
 | 
					PAPERLESS_GS_BINARY=<path>
 | 
				
			||||||
    Defaults to "/usr/bin/gs".
 | 
					    Defaults to "/usr/bin/gs".
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PAPERLESS_OPTIPNG_BINARY=<path>
 | 
					 | 
				
			||||||
    Defaults to "/usr/bin/optipng".
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
.. _configuration-docker:
 | 
					.. _configuration-docker:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -286,7 +286,6 @@ writing. Windows is not and will never be supported.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    *   ``fonts-liberation`` for generating thumbnails for plain text files
 | 
					    *   ``fonts-liberation`` for generating thumbnails for plain text files
 | 
				
			||||||
    *   ``imagemagick`` >= 6 for PDF conversion
 | 
					    *   ``imagemagick`` >= 6 for PDF conversion
 | 
				
			||||||
    *   ``optipng`` for optimizing thumbnails
 | 
					 | 
				
			||||||
    *   ``gnupg`` for handling encrypted documents
 | 
					    *   ``gnupg`` for handling encrypted documents
 | 
				
			||||||
    *   ``libpq-dev`` for PostgreSQL
 | 
					    *   ``libpq-dev`` for PostgreSQL
 | 
				
			||||||
    *   ``libmagic-dev`` for mime type detection
 | 
					    *   ``libmagic-dev`` for mime type detection
 | 
				
			||||||
@ -298,7 +297,7 @@ writing. Windows is not and will never be supported.
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    .. code::
 | 
					    .. code::
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        python3 python3-pip python3-dev imagemagick fonts-liberation optipng gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
 | 
					        python3 python3-pip python3-dev imagemagick fonts-liberation gnupg libpq-dev libmagic-dev mime-support libzbar0 poppler-utils
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    These dependencies are required for OCRmyPDF, which is used for text recognition.
 | 
					    These dependencies are required for OCRmyPDF, which is used for text recognition.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -730,8 +729,6 @@ configuring some options in paperless can help improve performance immensely:
 | 
				
			|||||||
*   If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
 | 
					*   If you want to perform OCR on the device, consider using ``PAPERLESS_OCR_CLEAN=none``.
 | 
				
			||||||
    This will speed up OCR times and use less memory at the expense of slightly worse
 | 
					    This will speed up OCR times and use less memory at the expense of slightly worse
 | 
				
			||||||
    OCR results.
 | 
					    OCR results.
 | 
				
			||||||
*   Set ``PAPERLESS_OPTIMIZE_THUMBNAILS`` to 'false' if you want faster consumption
 | 
					 | 
				
			||||||
    times. Thumbnails will be about 20% larger.
 | 
					 | 
				
			||||||
*   If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
 | 
					*   If using docker, consider setting ``PAPERLESS_WEBSERVER_WORKERS`` to
 | 
				
			||||||
    1. This will save some memory.
 | 
					    1. This will save some memory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -65,7 +65,6 @@
 | 
				
			|||||||
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 | 
					#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 | 
				
			||||||
#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
 | 
					#PAPERLESS_CONSUMER_ENABLE_BARCODES=false
 | 
				
			||||||
#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
 | 
					#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
 | 
				
			||||||
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
 | 
					 | 
				
			||||||
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
					#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
				
			||||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
					#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
				
			||||||
#PAPERLESS_FILENAME_DATE_ORDER=YMD
 | 
					#PAPERLESS_FILENAME_DATE_ORDER=YMD
 | 
				
			||||||
@ -84,4 +83,3 @@
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 | 
					#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 | 
				
			||||||
#PAPERLESS_GS_BINARY=/usr/bin/gs
 | 
					#PAPERLESS_GS_BINARY=/usr/bin/gs
 | 
				
			||||||
#PAPERLESS_OPTIPNG_BINARY=/usr/bin/optipng
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -273,7 +273,7 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            self.log("debug", f"Generating thumbnail for {self.filename}...")
 | 
					            self.log("debug", f"Generating thumbnail for {self.filename}...")
 | 
				
			||||||
            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
 | 
					            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
 | 
				
			||||||
            thumbnail = document_parser.get_optimised_thumbnail(
 | 
					            thumbnail = document_parser.get_thumbnail(
 | 
				
			||||||
                self.path,
 | 
					                self.path,
 | 
				
			||||||
                mime_type,
 | 
					                mime_type,
 | 
				
			||||||
                self.filename,
 | 
					                self.filename,
 | 
				
			||||||
 | 
				
			|||||||
@ -1,4 +1,5 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					import multiprocessing.pool
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
import time
 | 
					import time
 | 
				
			||||||
@ -8,10 +9,44 @@ from django.core.management.base import BaseCommand
 | 
				
			|||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
from documents.parsers import run_convert
 | 
					from documents.parsers import run_convert
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
logger = logging.getLogger("paperless.management.convert_thumbnails")
 | 
					logger = logging.getLogger("paperless.management.convert_thumbnails")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def _do_convert(work_package):
 | 
				
			||||||
 | 
					    _, existing_thumbnail, converted_thumbnail = work_package
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        logger.info(f"Converting thumbnail: {existing_thumbnail}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Run actual conversion
 | 
				
			||||||
 | 
					        run_convert(
 | 
				
			||||||
 | 
					            density=300,
 | 
				
			||||||
 | 
					            scale="500x5000>",
 | 
				
			||||||
 | 
					            alpha="remove",
 | 
				
			||||||
 | 
					            strip=True,
 | 
				
			||||||
 | 
					            trim=False,
 | 
				
			||||||
 | 
					            auto_orient=True,
 | 
				
			||||||
 | 
					            input_file=f"{existing_thumbnail}[0]",
 | 
				
			||||||
 | 
					            output_file=str(converted_thumbnail),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Copy newly created thumbnail to thumbnail directory
 | 
				
			||||||
 | 
					        shutil.copy(converted_thumbnail, existing_thumbnail.parent)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Remove the PNG version
 | 
				
			||||||
 | 
					        existing_thumbnail.unlink()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        logger.info(
 | 
				
			||||||
 | 
					            "Conversion to WebP completed, "
 | 
				
			||||||
 | 
					            f"replaced {existing_thumbnail.name} with {converted_thumbnail.name}",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    except Exception as e:
 | 
				
			||||||
 | 
					        logger.error(
 | 
				
			||||||
 | 
					            f"Error converting thumbnail" f" (existing file unchanged): {e}",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Command(BaseCommand):
 | 
					class Command(BaseCommand):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
@ -24,21 +59,19 @@ class Command(BaseCommand):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def handle(self, *args, **options):
 | 
					    def handle(self, *args, **options):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.stdout.write("Converting all PNG thumbnails to WebP")
 | 
					        logger.info("Converting all PNG thumbnails to WebP")
 | 
				
			||||||
 | 
					 | 
				
			||||||
        start = time.time()
 | 
					        start = time.time()
 | 
				
			||||||
 | 
					 | 
				
			||||||
        documents = Document.objects.all()
 | 
					        documents = Document.objects.all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with tempfile.TemporaryDirectory() as tempdir:
 | 
					        with tempfile.TemporaryDirectory() as tempdir:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            work_packages = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for document in documents:
 | 
					            for document in documents:
 | 
				
			||||||
                existing_thumbnail = Path(document.thumbnail_path).resolve()
 | 
					                existing_thumbnail = Path(document.thumbnail_path).resolve()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if existing_thumbnail.suffix == ".png":
 | 
					                if existing_thumbnail.suffix == ".png":
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    self.stdout.write(f"Converting thumbnail: {existing_thumbnail}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    # Change the existing filename suffix from png to webp
 | 
					                    # Change the existing filename suffix from png to webp
 | 
				
			||||||
                    converted_thumbnail_name = existing_thumbnail.with_suffix(
 | 
					                    converted_thumbnail_name = existing_thumbnail.with_suffix(
 | 
				
			||||||
                        ".webp",
 | 
					                        ".webp",
 | 
				
			||||||
@ -49,46 +82,16 @@ class Command(BaseCommand):
 | 
				
			|||||||
                        Path(tempdir) / Path(converted_thumbnail_name)
 | 
					                        Path(tempdir) / Path(converted_thumbnail_name)
 | 
				
			||||||
                    ).resolve()
 | 
					                    ).resolve()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    try:
 | 
					                    # Package up the necessary info
 | 
				
			||||||
                        # Run actual conversion
 | 
					                    work_packages.append(
 | 
				
			||||||
                        run_convert(
 | 
					                        (document, existing_thumbnail, converted_thumbnail),
 | 
				
			||||||
                            density=300,
 | 
					 | 
				
			||||||
                            scale="500x5000>",
 | 
					 | 
				
			||||||
                            alpha="remove",
 | 
					 | 
				
			||||||
                            strip=True,
 | 
					 | 
				
			||||||
                            trim=False,
 | 
					 | 
				
			||||||
                            auto_orient=True,
 | 
					 | 
				
			||||||
                            input_file=f"{existing_thumbnail}[0]",
 | 
					 | 
				
			||||||
                            output_file=str(converted_thumbnail),
 | 
					 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        if converted_thumbnail.exists():
 | 
					            if len(work_packages):
 | 
				
			||||||
                            # Copy newly created thumbnail to thumbnail directory
 | 
					                with multiprocessing.pool.Pool(processes=4, maxtasksperchild=4) as pool:
 | 
				
			||||||
                            shutil.copy(converted_thumbnail, existing_thumbnail.parent)
 | 
					                    pool.map(_do_convert, work_packages)
 | 
				
			||||||
 | 
					 | 
				
			||||||
                            # Remove the PNG version
 | 
					 | 
				
			||||||
                            existing_thumbnail.unlink()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                            self.stdout.write(
 | 
					 | 
				
			||||||
                                self.style.SUCCESS(
 | 
					 | 
				
			||||||
                                    "Conversion to WebP completed",
 | 
					 | 
				
			||||||
                                ),
 | 
					 | 
				
			||||||
                            )
 | 
					 | 
				
			||||||
                        else:
 | 
					 | 
				
			||||||
                            # Highly unlike to reach here
 | 
					 | 
				
			||||||
                            self.stderr.write(
 | 
					 | 
				
			||||||
                                self.style.WARNING("Converted thumbnail doesn't exist"),
 | 
					 | 
				
			||||||
                            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                    except Exception as e:
 | 
					 | 
				
			||||||
                        self.stderr.write(
 | 
					 | 
				
			||||||
                            self.style.ERROR(
 | 
					 | 
				
			||||||
                                f"Error converting thumbnail"
 | 
					 | 
				
			||||||
                                f" (existing file unchanged): {e}",
 | 
					 | 
				
			||||||
                            ),
 | 
					 | 
				
			||||||
                        )
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            end = time.time()
 | 
					            end = time.time()
 | 
				
			||||||
            duration = end - start
 | 
					            duration = end - start
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.stdout.write(f"Conversion completed in {duration:.3f}s")
 | 
					        logger.info(f"Conversion completed in {duration:.3f}s")
 | 
				
			||||||
 | 
				
			|||||||
@ -41,7 +41,7 @@ def handle_document(document_id):
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        parser.parse(document.source_path, mime_type, document.get_public_filename())
 | 
					        parser.parse(document.source_path, mime_type, document.get_public_filename())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        thumbnail = parser.get_optimised_thumbnail(
 | 
					        thumbnail = parser.get_thumbnail(
 | 
				
			||||||
            document.source_path,
 | 
					            document.source_path,
 | 
				
			||||||
            mime_type,
 | 
					            mime_type,
 | 
				
			||||||
            document.get_public_filename(),
 | 
					            document.get_public_filename(),
 | 
				
			||||||
 | 
				
			|||||||
@ -29,7 +29,7 @@ def _process_document(doc_in):
 | 
				
			|||||||
        if existing_thumbnail.exists() and existing_thumbnail.suffix == ".png":
 | 
					        if existing_thumbnail.exists() and existing_thumbnail.suffix == ".png":
 | 
				
			||||||
            existing_thumbnail.unlink()
 | 
					            existing_thumbnail.unlink()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        thumb = parser.get_optimised_thumbnail(
 | 
					        thumb = parser.get_thumbnail(
 | 
				
			||||||
            document.source_path,
 | 
					            document.source_path,
 | 
				
			||||||
            document.mime_type,
 | 
					            document.mime_type,
 | 
				
			||||||
            document.get_public_filename(),
 | 
					            document.get_public_filename(),
 | 
				
			||||||
 | 
				
			|||||||
@ -308,17 +308,11 @@ class Document(models.Model):
 | 
				
			|||||||
        png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
 | 
					        png_file_path = os.path.join(settings.THUMBNAIL_DIR, png_file_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # 1. Assume the thumbnail is WebP
 | 
					        # 1. Assume the thumbnail is WebP
 | 
				
			||||||
        if not os.path.exists(webp_file_path):
 | 
					        if os.path.exists(png_file_path):
 | 
				
			||||||
            # 2. If WebP doesn't exist, check PNG
 | 
					 | 
				
			||||||
            if not os.path.exists(png_file_path):
 | 
					 | 
				
			||||||
                # 3. If PNG doesn't exist, filename is being constructed, return WebP
 | 
					 | 
				
			||||||
                thumb = webp_file_path
 | 
					 | 
				
			||||||
            else:
 | 
					 | 
				
			||||||
                # 2.1 - PNG file exists, return path to it
 | 
					 | 
				
			||||||
            thumb = png_file_path
 | 
					            thumb = png_file_path
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            # 1.1 - WebP file exists, return path to it
 | 
					 | 
				
			||||||
            thumb = webp_file_path
 | 
					            thumb = webp_file_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return os.path.normpath(thumb)
 | 
					        return os.path.normpath(thumb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
 | 
				
			|||||||
@ -150,11 +150,14 @@ def run_convert(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_default_thumbnail() -> str:
 | 
					def get_default_thumbnail() -> str:
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Returns the path to a generic thumbnail
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
    return os.path.join(os.path.dirname(__file__), "resources", "document.png")
 | 
					    return os.path.join(os.path.dirname(__file__), "resources", "document.png")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
 | 
					def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None) -> str:
 | 
				
			||||||
    out_path = os.path.join(temp_dir, "convert_gs.png")
 | 
					    out_path = os.path.join(temp_dir, "convert_gs.webp")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # if convert fails, fall back to extracting
 | 
					    # if convert fails, fall back to extracting
 | 
				
			||||||
    # the first PDF page as a PNG using Ghostscript
 | 
					    # the first PDF page as a PNG using Ghostscript
 | 
				
			||||||
@ -319,29 +322,6 @@ class DocumentParser(LoggingMixin):
 | 
				
			|||||||
        """
 | 
					        """
 | 
				
			||||||
        raise NotImplementedError()
 | 
					        raise NotImplementedError()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					 | 
				
			||||||
        thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
 | 
					 | 
				
			||||||
        if settings.OPTIMIZE_THUMBNAILS and os.path.splitext(thumbnail)[1] == ".png":
 | 
					 | 
				
			||||||
            out_path = os.path.join(self.tempdir, "thumb_optipng.png")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            args = (
 | 
					 | 
				
			||||||
                settings.OPTIPNG_BINARY,
 | 
					 | 
				
			||||||
                "-silent",
 | 
					 | 
				
			||||||
                "-o5",
 | 
					 | 
				
			||||||
                thumbnail,
 | 
					 | 
				
			||||||
                "-out",
 | 
					 | 
				
			||||||
                out_path,
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            self.log("debug", f"Execute: {' '.join(args)}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            if not subprocess.Popen(args).wait() == 0:
 | 
					 | 
				
			||||||
                raise ParseError(f"Optipng failed at {args}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            return out_path
 | 
					 | 
				
			||||||
        else:
 | 
					 | 
				
			||||||
            return thumbnail
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def get_text(self):
 | 
					    def get_text(self):
 | 
				
			||||||
        return self.text
 | 
					        return self.text
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -183,7 +183,7 @@ class DummyParser(DocumentParser):
 | 
				
			|||||||
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
 | 
					        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
 | 
				
			||||||
        self.archive_path = archive_path
 | 
					        self.archive_path = archive_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
        return self.fake_thumb
 | 
					        return self.fake_thumb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def parse(self, document_path, mime_type, file_name=None):
 | 
					    def parse(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
@ -194,7 +194,7 @@ class CopyParser(DocumentParser):
 | 
				
			|||||||
    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
        return self.fake_thumb
 | 
					        return self.fake_thumb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
        return self.fake_thumb
 | 
					        return self.fake_thumb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, logging_group, progress_callback=None):
 | 
					    def __init__(self, logging_group, progress_callback=None):
 | 
				
			||||||
@ -216,7 +216,7 @@ class FaultyParser(DocumentParser):
 | 
				
			|||||||
        super().__init__(logging_group)
 | 
					        super().__init__(logging_group)
 | 
				
			||||||
        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
 | 
					        _, self.fake_thumb = tempfile.mkstemp(suffix=".png", dir=scratch_dir)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
        return self.fake_thumb
 | 
					        return self.fake_thumb
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def parse(self, document_path, mime_type, file_name=None):
 | 
					    def parse(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
 | 
				
			|||||||
@ -137,32 +137,3 @@ class TestConvertThumbnails(TestCase):
 | 
				
			|||||||
                run_convert_mock.assert_called_once()
 | 
					                run_convert_mock.assert_called_once()
 | 
				
			||||||
                self.assertIn("Error converting thumbnail", stderr)
 | 
					                self.assertIn("Error converting thumbnail", stderr)
 | 
				
			||||||
                self.assertTrue(thumb_file.exists())
 | 
					                self.assertTrue(thumb_file.exists())
 | 
				
			||||||
 | 
					 | 
				
			||||||
    @mock.patch("documents.management.commands.convert_thumbnails.run_convert")
 | 
					 | 
				
			||||||
    def test_convert_single_thumbnail_no_output(self, run_convert_mock):
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        GIVEN:
 | 
					 | 
				
			||||||
            - Document exists with PNG thumbnail
 | 
					 | 
				
			||||||
        WHEN:
 | 
					 | 
				
			||||||
            - Thumbnail conversion is attempted, but there is no output WebP
 | 
					 | 
				
			||||||
        THEN:
 | 
					 | 
				
			||||||
            - Single thumbnail is converted
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        with tempfile.TemporaryDirectory() as thumbnail_dir:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
            with override_settings(
 | 
					 | 
				
			||||||
                THUMBNAIL_DIR=thumbnail_dir,
 | 
					 | 
				
			||||||
            ):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                thumb_file = self.create_png_thumbnail_file(thumbnail_dir)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                stdout, stderr = self.call_command()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                run_convert_mock.assert_called_once()
 | 
					 | 
				
			||||||
                self.assertIn(f"{thumb_file}", stdout)
 | 
					 | 
				
			||||||
                self.assertNotIn("Conversion to WebP completed", stdout)
 | 
					 | 
				
			||||||
                self.assertIn("Converted thumbnail doesn't exist", stderr)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
                self.assertTrue(thumb_file.exists())
 | 
					 | 
				
			||||||
                self.assertFalse(thumb_file.with_suffix(".webp").exists())
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -87,31 +87,6 @@ def fake_get_thumbnail(self, path, mimetype, file_name):
 | 
				
			|||||||
    return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
 | 
					    return os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestBaseParser(TestCase):
 | 
					 | 
				
			||||||
    def setUp(self) -> None:
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.scratch = tempfile.mkdtemp()
 | 
					 | 
				
			||||||
        override_settings(SCRATCH_DIR=self.scratch).enable()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def tearDown(self) -> None:
 | 
					 | 
				
			||||||
        shutil.rmtree(self.scratch)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
 | 
					 | 
				
			||||||
    @override_settings(OPTIMIZE_THUMBNAILS=True)
 | 
					 | 
				
			||||||
    def test_get_optimised_thumbnail(self):
 | 
					 | 
				
			||||||
        parser = DocumentParser(None)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        parser.get_optimised_thumbnail("any", "not important", "document.pdf")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    @mock.patch("documents.parsers.DocumentParser.get_thumbnail", fake_get_thumbnail)
 | 
					 | 
				
			||||||
    @override_settings(OPTIMIZE_THUMBNAILS=False)
 | 
					 | 
				
			||||||
    def test_get_optimised_thumb_disabled(self):
 | 
					 | 
				
			||||||
        parser = DocumentParser(None)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        path = parser.get_optimised_thumbnail("any", "not important", "document.pdf")
 | 
					 | 
				
			||||||
        self.assertEqual(path, fake_get_thumbnail(None, None, None, None))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class TestParserAvailability(TestCase):
 | 
					class TestParserAvailability(TestCase):
 | 
				
			||||||
    def test_file_extensions(self):
 | 
					    def test_file_extensions(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -72,7 +72,7 @@ def binaries_check(app_configs, **kwargs):
 | 
				
			|||||||
    error = "Paperless can't find {}. Without it, consumption is impossible."
 | 
					    error = "Paperless can't find {}. Without it, consumption is impossible."
 | 
				
			||||||
    hint = "Either it's not in your ${PATH} or it's not installed."
 | 
					    hint = "Either it's not in your ${PATH} or it's not installed."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    binaries = (settings.CONVERT_BINARY, settings.OPTIPNG_BINARY, "tesseract")
 | 
					    binaries = (settings.CONVERT_BINARY, "tesseract")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    check_messages = []
 | 
					    check_messages = []
 | 
				
			||||||
    for binary in binaries:
 | 
					    for binary in binaries:
 | 
				
			||||||
 | 
				
			|||||||
@ -526,8 +526,6 @@ CONSUMER_BARCODE_TIFF_SUPPORT = __get_boolean(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
 | 
					CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 | 
					OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# The default language that tesseract will attempt to use when parsing
 | 
					# The default language that tesseract will attempt to use when parsing
 | 
				
			||||||
@ -570,8 +568,6 @@ CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 | 
					GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Pre-2.x versions of Paperless stored your documents locally with GPG
 | 
					# Pre-2.x versions of Paperless stored your documents locally with GPG
 | 
				
			||||||
# encryption, but that is no longer the default.  This behaviour is still
 | 
					# encryption, but that is no longer the default.  This behaviour is still
 | 
				
			||||||
 | 
				
			|||||||
@ -13,9 +13,9 @@ class TestChecks(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_binaries(self):
 | 
					    def test_binaries(self):
 | 
				
			||||||
        self.assertEqual(binaries_check(None), [])
 | 
					        self.assertEqual(binaries_check(None), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(CONVERT_BINARY="uuuhh", OPTIPNG_BINARY="forgot")
 | 
					    @override_settings(CONVERT_BINARY="uuuhh")
 | 
				
			||||||
    def test_binaries_fail(self):
 | 
					    def test_binaries_fail(self):
 | 
				
			||||||
        self.assertEqual(len(binaries_check(None)), 2)
 | 
					        self.assertEqual(len(binaries_check(None)), 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_paths_check(self):
 | 
					    def test_paths_check(self):
 | 
				
			||||||
        self.assertEqual(paths_check(None), [])
 | 
					        self.assertEqual(paths_check(None), [])
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user