mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Detect and reset invalid ASNs to 0 during indexing with a loud error to the user
This commit is contained in:
		
							parent
							
								
									a203b006e7
								
							
						
					
					
						commit
						0f536a9b9a
					
				@ -146,11 +146,16 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            return
 | 
					            return
 | 
				
			||||||
        # Validate the range is above zero and less than uint32_t max
 | 
					        # Validate the range is above zero and less than uint32_t max
 | 
				
			||||||
        # otherwise, Whoosh can't handle it in the index
 | 
					        # otherwise, Whoosh can't handle it in the index
 | 
				
			||||||
        if self.override_asn < 0 or self.override_asn > 0xFF_FF_FF_FF:
 | 
					        if (
 | 
				
			||||||
 | 
					            self.override_asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
 | 
				
			||||||
 | 
					            or self.override_asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
 | 
				
			||||||
 | 
					        ):
 | 
				
			||||||
            self._fail(
 | 
					            self._fail(
 | 
				
			||||||
                MESSAGE_ASN_RANGE,
 | 
					                MESSAGE_ASN_RANGE,
 | 
				
			||||||
                f"Not consuming {self.filename}: "
 | 
					                f"Not consuming {self.filename}: "
 | 
				
			||||||
                f"Given ASN {self.override_asn} is out of range [0, 4,294,967,295]",
 | 
					                f"Given ASN {self.override_asn} is out of range "
 | 
				
			||||||
 | 
					                f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
 | 
				
			||||||
 | 
					                f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}]",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        if Document.objects.filter(archive_serial_number=self.override_asn).exists():
 | 
					        if Document.objects.filter(archive_serial_number=self.override_asn).exists():
 | 
				
			||||||
            self._fail(
 | 
					            self._fail(
 | 
				
			||||||
 | 
				
			|||||||
@ -90,10 +90,22 @@ def open_index_searcher():
 | 
				
			|||||||
        searcher.close()
 | 
					        searcher.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def update_document(writer, doc):
 | 
					def update_document(writer: AsyncWriter, doc: Document):
 | 
				
			||||||
    tags = ",".join([t.name for t in doc.tags.all()])
 | 
					    tags = ",".join([t.name for t in doc.tags.all()])
 | 
				
			||||||
    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
 | 
					    tags_ids = ",".join([str(t.id) for t in doc.tags.all()])
 | 
				
			||||||
    comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
 | 
					    comments = ",".join([str(c.comment) for c in Comment.objects.filter(document=doc)])
 | 
				
			||||||
 | 
					    asn = doc.archive_serial_number
 | 
				
			||||||
 | 
					    if asn is not None and (
 | 
				
			||||||
 | 
					        asn < Document.ARCHIVE_SERIAL_NUMBER_MIN
 | 
				
			||||||
 | 
					        or asn > Document.ARCHIVE_SERIAL_NUMBER_MAX
 | 
				
			||||||
 | 
					    ):
 | 
				
			||||||
 | 
					        logger.error(
 | 
				
			||||||
 | 
					            f"Not indexing Archive Serial Number {asn} of document {doc.pk}. "
 | 
				
			||||||
 | 
					            f"ASN is out of range "
 | 
				
			||||||
 | 
					            f"[{Document.ARCHIVE_SERIAL_NUMBER_MIN:,}, "
 | 
				
			||||||
 | 
					            f"{Document.ARCHIVE_SERIAL_NUMBER_MAX:,}.",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        asn = 0
 | 
				
			||||||
    writer.update_document(
 | 
					    writer.update_document(
 | 
				
			||||||
        id=doc.pk,
 | 
					        id=doc.pk,
 | 
				
			||||||
        title=doc.title,
 | 
					        title=doc.title,
 | 
				
			||||||
@ -109,7 +121,7 @@ def update_document(writer, doc):
 | 
				
			|||||||
        has_type=doc.document_type is not None,
 | 
					        has_type=doc.document_type is not None,
 | 
				
			||||||
        created=doc.created,
 | 
					        created=doc.created,
 | 
				
			||||||
        added=doc.added,
 | 
					        added=doc.added,
 | 
				
			||||||
        asn=doc.archive_serial_number,
 | 
					        asn=asn,
 | 
				
			||||||
        modified=doc.modified,
 | 
					        modified=doc.modified,
 | 
				
			||||||
        path=doc.storage_path.name if doc.storage_path else None,
 | 
					        path=doc.storage_path.name if doc.storage_path else None,
 | 
				
			||||||
        path_id=doc.storage_path.id if doc.storage_path else None,
 | 
					        path_id=doc.storage_path.id if doc.storage_path else None,
 | 
				
			||||||
 | 
				
			|||||||
@ -3,6 +3,7 @@ import logging
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from collections import OrderedDict
 | 
					from collections import OrderedDict
 | 
				
			||||||
 | 
					from typing import Final
 | 
				
			||||||
from typing import Optional
 | 
					from typing import Optional
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import dateutil.parser
 | 
					import dateutil.parser
 | 
				
			||||||
@ -229,6 +230,9 @@ class Document(models.Model):
 | 
				
			|||||||
        help_text=_("The original name of the file when it was uploaded"),
 | 
					        help_text=_("The original name of the file when it was uploaded"),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    ARCHIVE_SERIAL_NUMBER_MIN: Final[int] = 0
 | 
				
			||||||
 | 
					    ARCHIVE_SERIAL_NUMBER_MAX: Final[int] = 0xFF_FF_FF_FF
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    archive_serial_number = models.PositiveIntegerField(
 | 
					    archive_serial_number = models.PositiveIntegerField(
 | 
				
			||||||
        _("archive serial number"),
 | 
					        _("archive serial number"),
 | 
				
			||||||
        blank=True,
 | 
					        blank=True,
 | 
				
			||||||
@ -236,8 +240,8 @@ class Document(models.Model):
 | 
				
			|||||||
        unique=True,
 | 
					        unique=True,
 | 
				
			||||||
        db_index=True,
 | 
					        db_index=True,
 | 
				
			||||||
        validators=[
 | 
					        validators=[
 | 
				
			||||||
            MaxValueValidator(0xFF_FF_FF_FF),
 | 
					            MaxValueValidator(ARCHIVE_SERIAL_NUMBER_MAX),
 | 
				
			||||||
            MinValueValidator(0),
 | 
					            MinValueValidator(ARCHIVE_SERIAL_NUMBER_MIN),
 | 
				
			||||||
        ],
 | 
					        ],
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "The position of this document in your physical document " "archive.",
 | 
					            "The position of this document in your physical document " "archive.",
 | 
				
			||||||
 | 
				
			|||||||
@ -1,3 +1,5 @@
 | 
				
			|||||||
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents import index
 | 
					from documents import index
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
@ -31,3 +33,60 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
 | 
					        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
 | 
				
			||||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
 | 
					        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_archive_serial_number_ranging(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        GIVEN:
 | 
				
			||||||
 | 
					            - Document with an archive serial number above schema allowed size
 | 
				
			||||||
 | 
					        WHEN:
 | 
				
			||||||
 | 
					            - Document is provided to the index
 | 
				
			||||||
 | 
					        THEN:
 | 
				
			||||||
 | 
					            - Error is logged
 | 
				
			||||||
 | 
					            - Document ASN is reset to 0 for the index
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            content="test test2 test3",
 | 
				
			||||||
 | 
					            # yes, this is allowed, unless full_clean is run
 | 
				
			||||||
 | 
					            # DRF does call the validators, this test won't
 | 
				
			||||||
 | 
					            archive_serial_number=Document.ARCHIVE_SERIAL_NUMBER_MAX + 1,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with self.assertLogs("paperless.index", level="ERROR") as cm:
 | 
				
			||||||
 | 
					            with mock.patch(
 | 
				
			||||||
 | 
					                "documents.index.AsyncWriter.update_document",
 | 
				
			||||||
 | 
					            ) as mocked_update_doc:
 | 
				
			||||||
 | 
					                index.add_or_update_document(doc1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                mocked_update_doc.assert_called_once()
 | 
				
			||||||
 | 
					                _, kwargs = mocked_update_doc.call_args
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                self.assertEqual(kwargs["asn"], 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                error_str = cm.output[0]
 | 
				
			||||||
 | 
					                expected_str = "ERROR:paperless.index:Not indexing Archive Serial Number 4294967296 of document 1"
 | 
				
			||||||
 | 
					                self.assertIn(expected_str, error_str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_archive_serial_number_is_none(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        GIVEN:
 | 
				
			||||||
 | 
					            - Document with no archive serial number
 | 
				
			||||||
 | 
					        WHEN:
 | 
				
			||||||
 | 
					            - Document is provided to the index
 | 
				
			||||||
 | 
					        THEN:
 | 
				
			||||||
 | 
					            - ASN isn't touched
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            content="test test2 test3",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with mock.patch(
 | 
				
			||||||
 | 
					            "documents.index.AsyncWriter.update_document",
 | 
				
			||||||
 | 
					        ) as mocked_update_doc:
 | 
				
			||||||
 | 
					            index.add_or_update_document(doc1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            mocked_update_doc.assert_called_once()
 | 
				
			||||||
 | 
					            _, kwargs = mocked_update_doc.call_args
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            self.assertIsNone(kwargs["asn"])
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user