mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-31 02:27:10 -04:00 
			
		
		
		
	Added a consume-start and consume-finish signal
This commit is contained in:
		
							parent
							
								
									8f9e34078b
								
							
						
					
					
						commit
						1170139127
					
				| @ -26,6 +26,8 @@ from paperless.db import GnuPG | |||||||
| 
 | 
 | ||||||
| from .models import Correspondent, Tag, Document, Log | from .models import Correspondent, Tag, Document, Log | ||||||
| from .languages import ISO639 | from .languages import ISO639 | ||||||
|  | from .signals import ( | ||||||
|  |     document_consumption_started, document_consumption_finished) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class OCRError(Exception): | class OCRError(Exception): | ||||||
| @ -118,22 +120,33 @@ class Consumer(object): | |||||||
| 
 | 
 | ||||||
|             self.log("info", "Consuming {}".format(doc)) |             self.log("info", "Consuming {}".format(doc)) | ||||||
| 
 | 
 | ||||||
|  |             document_consumption_started.send( | ||||||
|  |                 sender=self.__class__, filename=doc) | ||||||
|  | 
 | ||||||
|             tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) |             tempdir = tempfile.mkdtemp(prefix="paperless", dir=self.SCRATCH) | ||||||
|             imgs = self._get_greyscale(tempdir, doc) |             imgs = self._get_greyscale(tempdir, doc) | ||||||
|             thumbnail = self._get_thumbnail(tempdir, doc) |             thumbnail = self._get_thumbnail(tempdir, doc) | ||||||
| 
 | 
 | ||||||
|             try: |             try: | ||||||
|                 text = self._get_ocr(imgs) | 
 | ||||||
|                 self._store(text, doc, thumbnail) |                 document = self._store(self._get_ocr(imgs), doc, thumbnail) | ||||||
|  | 
 | ||||||
|             except OCRError as e: |             except OCRError as e: | ||||||
|  | 
 | ||||||
|                 self._ignore.append(doc) |                 self._ignore.append(doc) | ||||||
|                 self.log("error", "OCR FAILURE for {}: {}".format(doc, e)) |                 self.log("error", "OCR FAILURE for {}: {}".format(doc, e)) | ||||||
|                 self._cleanup_tempdir(tempdir) |                 self._cleanup_tempdir(tempdir) | ||||||
|  | 
 | ||||||
|                 continue |                 continue | ||||||
|  | 
 | ||||||
|             else: |             else: | ||||||
|  | 
 | ||||||
|                 self._cleanup_tempdir(tempdir) |                 self._cleanup_tempdir(tempdir) | ||||||
|                 self._cleanup_doc(doc) |                 self._cleanup_doc(doc) | ||||||
| 
 | 
 | ||||||
|  |                 document_consumption_finished.send( | ||||||
|  |                     sender=self.__class__, filename=document) | ||||||
|  | 
 | ||||||
|     def _get_greyscale(self, tempdir, doc): |     def _get_greyscale(self, tempdir, doc): | ||||||
|         """ |         """ | ||||||
|         Greyscale images are easier for Tesseract to OCR |         Greyscale images are easier for Tesseract to OCR | ||||||
| @ -360,6 +373,8 @@ class Consumer(object): | |||||||
| 
 | 
 | ||||||
|         self.log("info", "Completed") |         self.log("info", "Completed") | ||||||
| 
 | 
 | ||||||
|  |         return document | ||||||
|  | 
 | ||||||
|     def _cleanup_tempdir(self, d): |     def _cleanup_tempdir(self, d): | ||||||
|         self.log("debug", "Deleting directory {}".format(d)) |         self.log("debug", "Deleting directory {}".format(d)) | ||||||
|         shutil.rmtree(d) |         shutil.rmtree(d) | ||||||
|  | |||||||
							
								
								
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										4
									
								
								src/documents/signals.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,4 @@ | |||||||
|  | from django.dispatch import Signal | ||||||
|  | 
 | ||||||
|  | document_consumption_started = Signal(providing_args=["filename"]) | ||||||
|  | document_consumption_finished = Signal(providing_args=["document"]) | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user