mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-26 08:12:34 -04:00 
			
		
		
		
	Fix: handle page count exception for pw-protected files (#8240)
This commit is contained in:
		
							parent
							
								
									c22a80abd3
								
							
						
					
					
						commit
						a6f4c75a72
					
				| @ -43,10 +43,15 @@ class RasterisedDocumentParser(DocumentParser): | |||||||
|     def get_page_count(self, document_path, mime_type): |     def get_page_count(self, document_path, mime_type): | ||||||
|         page_count = None |         page_count = None | ||||||
|         if mime_type == "application/pdf": |         if mime_type == "application/pdf": | ||||||
|             import pikepdf |             try: | ||||||
|  |                 import pikepdf | ||||||
| 
 | 
 | ||||||
|             with pikepdf.Pdf.open(document_path) as pdf: |                 with pikepdf.Pdf.open(document_path) as pdf: | ||||||
|                 page_count = len(pdf.pages) |                     page_count = len(pdf.pages) | ||||||
|  |             except Exception as e: | ||||||
|  |                 self.log.warning( | ||||||
|  |                     f"Unable to determine PDF page count {document_path}: {e}", | ||||||
|  |                 ) | ||||||
|         return page_count |         return page_count | ||||||
| 
 | 
 | ||||||
|     def extract_metadata(self, document_path, mime_type): |     def extract_metadata(self, document_path, mime_type): | ||||||
|  | |||||||
| @ -81,6 +81,24 @@ class TestParser(DirectoriesMixin, FileSystemAssertsMixin, TestCase): | |||||||
|         ) |         ) | ||||||
|         self.assertEqual(page_count, 6) |         self.assertEqual(page_count, 6) | ||||||
| 
 | 
 | ||||||
|  |     def test_get_page_count_password_protected(self): | ||||||
|  |         """ | ||||||
|  |         GIVEN: | ||||||
|  |             - Password protected PDF file | ||||||
|  |         WHEN: | ||||||
|  |             - The number of pages is requested | ||||||
|  |         THEN: | ||||||
|  |             - The method returns None | ||||||
|  |         """ | ||||||
|  |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|  |         with self.assertLogs("paperless.parsing.tesseract", level="WARNING") as cm: | ||||||
|  |             page_count = parser.get_page_count( | ||||||
|  |                 os.path.join(self.SAMPLE_FILES, "password-protected.pdf"), | ||||||
|  |                 "application/pdf", | ||||||
|  |             ) | ||||||
|  |             self.assertEqual(page_count, None) | ||||||
|  |             self.assertIn("Unable to determine PDF page count", cm.output[0]) | ||||||
|  | 
 | ||||||
|     def test_thumbnail(self): |     def test_thumbnail(self): | ||||||
|         parser = RasterisedDocumentParser(uuid.uuid4()) |         parser = RasterisedDocumentParser(uuid.uuid4()) | ||||||
|         thumb = parser.get_thumbnail( |         thumb = parser.get_thumbnail( | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user