mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-25 15:52:35 -04:00 
			
		
		
		
	Adds new setting to control color conversions (#4709)
This commit is contained in:
		
							parent
							
								
									e1b573adeb
								
							
						
					
					
						commit
						e3f4e0b775
					
				| @ -704,6 +704,20 @@ but could result in missing text content. | |||||||
|         this value if you are certain your documents are not malicious and |         this value if you are certain your documents are not malicious and | ||||||
|         you need the text which was not OCRed |         you need the text which was not OCRed | ||||||
| 
 | 
 | ||||||
|  | #### [`PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY=<RGB>`](#PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY) {#PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY} | ||||||
|  | 
 | ||||||
|  | : Controls the Ghostscript color conversion strategy when creating the archive file. This setting | ||||||
|  | will only be utilized if the output is a version of PDF/A. | ||||||
|  | 
 | ||||||
|  |     Valid options are CMYK, Gray, LeaveColorUnchanged, RGB or UseDeviceIndependentColor. | ||||||
|  | 
 | ||||||
|  |     You can find more on the settings [here](https://ghostscript.readthedocs.io/en/latest/VectorDevices.html#color-conversion-and-management) in the Ghostscript documentation. | ||||||
|  | 
 | ||||||
|  |     !!! warning | ||||||
|  | 
 | ||||||
|  |         Utilizing some of the options may result in errors when creating archive | ||||||
|  |         files from PDFs. | ||||||
|  | 
 | ||||||
| #### [`PAPERLESS_OCR_USER_ARGS=<json>`](#PAPERLESS_OCR_USER_ARGS) {#PAPERLESS_OCR_USER_ARGS} | #### [`PAPERLESS_OCR_USER_ARGS=<json>`](#PAPERLESS_OCR_USER_ARGS) {#PAPERLESS_OCR_USER_ARGS} | ||||||
| 
 | 
 | ||||||
| : OCRmyPDF offers many more options. Use this parameter to specify any | : OCRmyPDF offers many more options. Use this parameter to specify any | ||||||
|  | |||||||
| @ -864,6 +864,11 @@ OCR_MAX_IMAGE_PIXELS: Optional[int] = None | |||||||
| if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None: | if os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS") is not None: | ||||||
|     OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS")) |     OCR_MAX_IMAGE_PIXELS: int = int(os.environ.get("PAPERLESS_OCR_MAX_IMAGE_PIXELS")) | ||||||
| 
 | 
 | ||||||
|  | OCR_COLOR_CONVERSION_STRATEGY = os.getenv( | ||||||
|  |     "PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY", | ||||||
|  |     "RGB", | ||||||
|  | ) | ||||||
|  | 
 | ||||||
| OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}") | OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}") | ||||||
| 
 | 
 | ||||||
| # GNUPG needs a home directory for some reason | # GNUPG needs a home directory for some reason | ||||||
|  | |||||||
| @ -186,6 +186,11 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|             "progress_bar": False, |             "progress_bar": False, | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         if "pdfa" in ocrmypdf_args["output_type"]: | ||||||
|  |             ocrmypdf_args[ | ||||||
|  |                 "color_conversion_strategy" | ||||||
|  |             ] = settings.OCR_COLOR_CONVERSION_STRATEGY | ||||||
|  | 
 | ||||||
|         if settings.OCR_MODE == "force" or safe_fallback: |         if settings.OCR_MODE == "force" or safe_fallback: | ||||||
|             ocrmypdf_args["force_ocr"] = True |             ocrmypdf_args["force_ocr"] = True | ||||||
|         elif settings.OCR_MODE in ["skip", "skip_noarchive"]: |         elif settings.OCR_MODE in ["skip", "skip_noarchive"]: | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user