mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	a couple fixes and more supported image files
This commit is contained in:
		
							parent
							
								
									5e1543bad5
								
							
						
					
					
						commit
						e3ce573fbb
					
				@ -268,8 +268,9 @@ def update_filename_and_move_files(sender, instance, **kwargs):
 | 
			
		||||
        logging.getLogger(__name__).debug(
 | 
			
		||||
            f"Moved file {old_source_path} to {new_source_path}.")
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).debug(
 | 
			
		||||
            f"Moved file {old_archive_path} to {new_archive_path}.")
 | 
			
		||||
        if instance.archive_checksum:
 | 
			
		||||
            logging.getLogger(__name__).debug(
 | 
			
		||||
                f"Moved file {old_archive_path} to {new_archive_path}.")
 | 
			
		||||
 | 
			
		||||
    except OSError as e:
 | 
			
		||||
        instance.filename = old_filename
 | 
			
		||||
 | 
			
		||||
@ -65,7 +65,10 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
    def is_image(self, mime_type):
 | 
			
		||||
        return mime_type in [
 | 
			
		||||
            "image/png",
 | 
			
		||||
            "image/jpeg"
 | 
			
		||||
            "image/jpeg",
 | 
			
		||||
            "image/tiff",
 | 
			
		||||
            "image/bmp",
 | 
			
		||||
            "image/gif",
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
    def get_dpi(self, image):
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
 | 
			
		||||
        "mime_types": {
 | 
			
		||||
            "application/pdf": ".pdf",
 | 
			
		||||
            "image/jpeg": ".jpg",
 | 
			
		||||
            "image/png": ".png"
 | 
			
		||||
            "image/png": ".png",
 | 
			
		||||
            "image/tiff": ".tif",
 | 
			
		||||
            "image/gif": ".gif",
 | 
			
		||||
            "image/bmp": ".bmp",
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.bmp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.bmp
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 1.7 MiB  | 
							
								
								
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.gif
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.gif
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 18 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 19 KiB  | 
							
								
								
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.tif
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								src/paperless_tesseract/tests/samples/simple.tif
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							@ -247,3 +247,33 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
			
		||||
        parser.parse(os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf")
 | 
			
		||||
        self.assertTrue(os.path.join(parser.archive_path))
 | 
			
		||||
        self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2", "page 3"])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestParserFileTypes(DirectoriesMixin, TestCase):
 | 
			
		||||
 | 
			
		||||
    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
 | 
			
		||||
 | 
			
		||||
    def test_bmp(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.bmp"), "image/bmp")
 | 
			
		||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
			
		||||
        self.assertTrue("this is a test document" in parser.get_text().lower())
 | 
			
		||||
 | 
			
		||||
    def test_jpg(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.jpg"), "image/jpeg")
 | 
			
		||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
			
		||||
        self.assertTrue("this is a test document" in parser.get_text().lower())
 | 
			
		||||
 | 
			
		||||
    @override_settings(OCR_IMAGE_DPI=200)
 | 
			
		||||
    def test_gif(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.gif"), "image/gif")
 | 
			
		||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
			
		||||
        self.assertTrue("this is a test document" in parser.get_text().lower())
 | 
			
		||||
 | 
			
		||||
    def test_tiff(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(os.path.join(self.SAMPLE_FILES, "simple.tif"), "image/tiff")
 | 
			
		||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
			
		||||
        self.assertTrue("this is a test document" in parser.get_text().lower())
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user