mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	Merge pull request #494 from JensPfeifle/fix_447
fix parse error of some documents by using gs
This commit is contained in:
		
						commit
						305d50d7ed
					
				@ -247,6 +247,9 @@ PAPERLESS_EMAIL_SECRET=""
 | 
			
		||||
# Convert (part of the ImageMagick suite)
 | 
			
		||||
#PAPERLESS_CONVERT_BINARY=/usr/bin/convert
 | 
			
		||||
 | 
			
		||||
# Ghostscript
 | 
			
		||||
#PAPERLESS_GS_BINARY = /usr/bin/gs
 | 
			
		||||
 | 
			
		||||
# Unpaper
 | 
			
		||||
#PAPERLESS_UNPAPER_BINARY=/usr/bin/unpaper
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -263,6 +263,9 @@ CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
 | 
			
		||||
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
 | 
			
		||||
CONVERT_DENSITY = os.getenv("PAPERLESS_CONVERT_DENSITY")
 | 
			
		||||
 | 
			
		||||
# Ghostscript
 | 
			
		||||
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
 | 
			
		||||
 | 
			
		||||
# OptiPNG
 | 
			
		||||
OPTIPNG_BINARY = os.getenv("PAPERLESS_OPTIPNG_BINARY", "optipng")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -29,6 +29,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    CONVERT = settings.CONVERT_BINARY
 | 
			
		||||
    GHOSTSCRIPT = settings.GS_BINARY
 | 
			
		||||
    DENSITY = settings.CONVERT_DENSITY if settings.CONVERT_DENSITY else 300
 | 
			
		||||
    THREADS = int(settings.OCR_THREADS) if settings.OCR_THREADS else None
 | 
			
		||||
    UNPAPER = settings.UNPAPER_BINARY
 | 
			
		||||
@ -47,13 +48,38 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
        out_path = os.path.join(self.tempdir, "convert.png")
 | 
			
		||||
 | 
			
		||||
        # Run convert to get a decent thumbnail
 | 
			
		||||
        run_convert(
 | 
			
		||||
            self.CONVERT,
 | 
			
		||||
            "-scale", "500x5000",
 | 
			
		||||
            "-alpha", "remove",
 | 
			
		||||
            "{}[0]".format(self.document_path),
 | 
			
		||||
            out_path
 | 
			
		||||
        )
 | 
			
		||||
        try:
 | 
			
		||||
            run_convert(
 | 
			
		||||
                self.CONVERT,
 | 
			
		||||
                "-scale", "500x5000",
 | 
			
		||||
                "-alpha", "remove",
 | 
			
		||||
                "{}[0]".format(self.document_path),
 | 
			
		||||
                out_path
 | 
			
		||||
            )
 | 
			
		||||
        except ParseError:
 | 
			
		||||
            # if convert fails, fall back to extracting
 | 
			
		||||
            # the first PDF page as a PNG using Ghostscript
 | 
			
		||||
            self.log(
 | 
			
		||||
                "warning",
 | 
			
		||||
                "Thumbnail generation with ImageMagick failed, "
 | 
			
		||||
                "falling back to Ghostscript."
 | 
			
		||||
            )
 | 
			
		||||
            gs_out_path = os.path.join(self.tempdir, "gs_out.png")
 | 
			
		||||
            cmd = [self.GHOSTSCRIPT,
 | 
			
		||||
                   "-q",
 | 
			
		||||
                   "-sDEVICE=pngalpha",
 | 
			
		||||
                   "-o", gs_out_path,
 | 
			
		||||
                   self.document_path]
 | 
			
		||||
            if not subprocess.Popen(cmd).wait() == 0:
 | 
			
		||||
                raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
 | 
			
		||||
            # then run convert on the output from gs
 | 
			
		||||
            run_convert(
 | 
			
		||||
                self.CONVERT,
 | 
			
		||||
                "-scale", "500x5000",
 | 
			
		||||
                "-alpha", "remove",
 | 
			
		||||
                gs_out_path,
 | 
			
		||||
                out_path
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return out_path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user