mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Added a text cache to optimize performance of date detection
This commit is contained in:
		
							parent
							
								
									bef2d94374
								
							
						
					
					
						commit
						40f8ba23a4
					
				@ -35,6 +35,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
    DATE_ORDER = settings.DATE_ORDER
 | 
					    DATE_ORDER = settings.DATE_ORDER
 | 
				
			||||||
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 | 
					    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 | 
				
			||||||
    OCR_ALWAYS = settings.OCR_ALWAYS
 | 
					    OCR_ALWAYS = settings.OCR_ALWAYS
 | 
				
			||||||
 | 
					    TEXT_CACHE = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_thumbnail(self):
 | 
					    def get_thumbnail(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
@ -62,15 +63,20 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_text(self):
 | 
					    def get_text(self):
 | 
				
			||||||
 | 
					        if self.TEXT_CACHE is not None:
 | 
				
			||||||
 | 
					            return self.TEXT_CACHE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not self.OCR_ALWAYS and self._is_ocred():
 | 
					        if not self.OCR_ALWAYS and self._is_ocred():
 | 
				
			||||||
            self.log("info", "Skipping OCR, using Text from PDF")
 | 
					            self.log("info", "Skipping OCR, using Text from PDF")
 | 
				
			||||||
            return get_text_from_pdf(self.document_path)
 | 
					            self.TEXT_CACHE = get_text_from_pdf(self.document_path)
 | 
				
			||||||
 | 
					            return self.TEXT_CACHE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        images = self._get_greyscale()
 | 
					        images = self._get_greyscale()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return self._get_ocr(images)
 | 
					            self.TEXT_CACHE = self._get_ocr(images)
 | 
				
			||||||
 | 
					            return self.TEXT_CACHE
 | 
				
			||||||
        except OCRError as e:
 | 
					        except OCRError as e:
 | 
				
			||||||
            raise ParseError(e)
 | 
					            raise ParseError(e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user