mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Merge pull request #1442 from paperless-ngx/fix/skip-archive-still-archiving
Bugfix: Fixes the creation of an archive file, even if noarchive was specified
This commit is contained in:
		
						commit
						1692bac3fe
					
				@ -276,7 +276,10 @@ class RasterisedDocumentParser(DocumentParser):
 | 
			
		||||
            self.log("debug", f"Calling OCRmyPDF with args: {args}")
 | 
			
		||||
            ocrmypdf.ocr(**args)
 | 
			
		||||
 | 
			
		||||
            self.archive_path = archive_path
 | 
			
		||||
            # Only create archive file if archiving isn't being skipped
 | 
			
		||||
            if settings.OCR_MODE != "skip_noarchive":
 | 
			
		||||
                self.archive_path = archive_path
 | 
			
		||||
 | 
			
		||||
            self.text = self.extract_text(sidecar_file, archive_path)
 | 
			
		||||
 | 
			
		||||
            if not self.text:
 | 
			
		||||
 | 
			
		||||
@ -364,6 +364,16 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
			
		||||
 | 
			
		||||
    @override_settings(OCR_MODE="skip_noarchive")
 | 
			
		||||
    def test_skip_noarchive_withtext(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - File with existing text layer
 | 
			
		||||
            - OCR mode set to skip_noarchive
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Document is parsed
 | 
			
		||||
        THEN:
 | 
			
		||||
            - Text from images is extracted
 | 
			
		||||
            - No archive file is created
 | 
			
		||||
        """
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(
 | 
			
		||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
			
		||||
@ -377,17 +387,29 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
			
		||||
 | 
			
		||||
    @override_settings(OCR_MODE="skip_noarchive")
 | 
			
		||||
    def test_skip_noarchive_notext(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - File with text contained in images but no text layer
 | 
			
		||||
            - OCR mode set to skip_noarchive
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Document is parsed
 | 
			
		||||
        THEN:
 | 
			
		||||
            - Text from images is extracted
 | 
			
		||||
            - No archive file is created
 | 
			
		||||
        """
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(
 | 
			
		||||
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
 | 
			
		||||
            "application/pdf",
 | 
			
		||||
        )
 | 
			
		||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
			
		||||
 | 
			
		||||
        self.assertContainsStrings(
 | 
			
		||||
            parser.get_text().lower(),
 | 
			
		||||
            ["page 1", "page 2", "page 3"],
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        self.assertIsNone(parser.archive_path)
 | 
			
		||||
 | 
			
		||||
    @override_settings(OCR_MODE="skip")
 | 
			
		||||
    def test_multi_page_mixed(self):
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
@ -408,6 +430,16 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
			
		||||
 | 
			
		||||
    @override_settings(OCR_MODE="skip_noarchive")
 | 
			
		||||
    def test_multi_page_mixed_no_archive(self):
 | 
			
		||||
        """
 | 
			
		||||
        GIVEN:
 | 
			
		||||
            - File with some text contained in images and some in text layer
 | 
			
		||||
            - OCR mode set to skip_noarchive
 | 
			
		||||
        WHEN:
 | 
			
		||||
            - Document is parsed
 | 
			
		||||
        THEN:
 | 
			
		||||
            - Text from images is extracted
 | 
			
		||||
            - No archive file is created
 | 
			
		||||
        """
 | 
			
		||||
        parser = RasterisedDocumentParser(None)
 | 
			
		||||
        parser.parse(
 | 
			
		||||
            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user