mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-31 10:37:12 -04:00 
			
		
		
		
	Added a consume-start and consume-finish signal
This commit is contained in:
		
							parent
							
								
									8f9e34078b
								
							
						
					
					
						commit
						1170139127
					
				| @ -26,6 +26,8 @@ from paperless.db import GnuPG | ||||
| 
 | ||||
| from .models import Correspondent, Tag, Document, Log | ||||
| from .languages import ISO639 | ||||
| from .signals import ( | ||||
|     document_consumption_started, document_consumption_finished) | ||||
| 
 | ||||
| 
 | ||||
| class OCRError(Exception): | ||||
| @ -118,22 +120,33 @@ class Consumer(object): | ||||
| 
 | ||||
|             self.log("info", "Consuming {}".format(doc)) | ||||
| 
 | ||||
|             document_consumption_started.send( | ||||
|                 sender=self.__class__, filename=doc) | ||||
| 
 | ||||
|             tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) | ||||
|             imgs = self._get_greyscale(tempdir, doc) | ||||
|             thumbnail = self._get_thumbnail(tempdir, doc) | ||||
| 
 | ||||
|             try: | ||||
|                 text = self._get_ocr(imgs) | ||||
|                 self._store(text, doc, thumbnail) | ||||
| 
 | ||||
|                 document = self._store(self._get_ocr(imgs), doc, thumbnail) | ||||
| 
 | ||||
|             except OCRError as e: | ||||
| 
 | ||||
|                 self._ignore.append(doc) | ||||
|                 self.log("error", "OCR FAILURE for {}: {}".format(doc, e)) | ||||
|                 self._cleanup_tempdir(tempdir) | ||||
| 
 | ||||
|                 continue | ||||
| 
 | ||||
|             else: | ||||
| 
 | ||||
|                 self._cleanup_tempdir(tempdir) | ||||
|                 self._cleanup_doc(doc) | ||||
| 
 | ||||
|                 document_consumption_finished.send( | ||||
|                     sender=self.__class__, filename=document) | ||||
| 
 | ||||
|     def _get_greyscale(self, tempdir, doc): | ||||
|         """ | ||||
|         Greyscale images are easier for Tesseract to OCR | ||||
| @ -360,6 +373,8 @@ class Consumer(object): | ||||
| 
 | ||||
|         self.log("info", "Completed") | ||||
| 
 | ||||
|         return document | ||||
| 
 | ||||
|     def _cleanup_tempdir(self, d): | ||||
|         self.log("debug", "Deleting directory {}".format(d)) | ||||
|         shutil.rmtree(d) | ||||
|  | ||||
							
								
								
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | ||||
| from django.dispatch import Signal | ||||
| 
 | ||||
| document_consumption_started = Signal(providing_args=["filename"]) | ||||
| document_consumption_finished = Signal(providing_args=["document"]) | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user