mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	Merge pull request #278 from stumpylog/pre-commit-python-changes
Python Cleanup from pre-commit
This commit is contained in:
		
						commit
						168ce2111d
					
				@ -62,6 +62,7 @@ repos:
 | 
				
			|||||||
        exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)"
 | 
					        exclude: "(migrations)|(paperless/settings.py)|(.*\\.tox)|(.*/tests/.*)"
 | 
				
			||||||
        args:
 | 
					        args:
 | 
				
			||||||
          - "--max-line-length=88"
 | 
					          - "--max-line-length=88"
 | 
				
			||||||
 | 
					          - "--ignore=E203,W503"
 | 
				
			||||||
  - repo: https://github.com/psf/black
 | 
					  - repo: https://github.com/psf/black
 | 
				
			||||||
    rev: 22.1.0
 | 
					    rev: 22.1.0
 | 
				
			||||||
    hooks:
 | 
					    hooks:
 | 
				
			||||||
 | 
				
			|||||||
@ -1,2 +1,5 @@
 | 
				
			|||||||
# this is here so that django finds the checks.
 | 
					# this is here so that django finds the checks.
 | 
				
			||||||
from .checks import *
 | 
					from .checks import changed_password_check
 | 
				
			||||||
 | 
					from .checks import parser_check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ["changed_password_check", "parser_check"]
 | 
				
			||||||
 | 
				
			|||||||
@ -1,13 +1,11 @@
 | 
				
			|||||||
from django.contrib import admin
 | 
					from django.contrib import admin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .models import (
 | 
					from .models import Correspondent
 | 
				
			||||||
    Correspondent,
 | 
					from .models import Document
 | 
				
			||||||
    Document,
 | 
					from .models import DocumentType
 | 
				
			||||||
    DocumentType,
 | 
					from .models import SavedView
 | 
				
			||||||
    Tag,
 | 
					from .models import SavedViewFilterRule
 | 
				
			||||||
    SavedView,
 | 
					from .models import Tag
 | 
				
			||||||
    SavedViewFilterRule,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CorrespondentAdmin(admin.ModelAdmin):
 | 
					class CorrespondentAdmin(admin.ModelAdmin):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.apps import AppConfig
 | 
					from django.apps import AppConfig
 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -8,7 +8,10 @@ class BulkArchiveStrategy:
 | 
				
			|||||||
        self.zipf = zipf
 | 
					        self.zipf = zipf
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_unique_filename(
 | 
					    def make_unique_filename(
 | 
				
			||||||
        self, doc: Document, archive: bool = False, folder: str = ""
 | 
					        self,
 | 
				
			||||||
 | 
					        doc: Document,
 | 
				
			||||||
 | 
					        archive: bool = False,
 | 
				
			||||||
 | 
					        folder: str = "",
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        counter = 0
 | 
					        counter = 0
 | 
				
			||||||
        while True:
 | 
					        while True:
 | 
				
			||||||
@ -34,7 +37,8 @@ class ArchiveOnlyStrategy(BulkArchiveStrategy):
 | 
				
			|||||||
    def add_document(self, doc: Document):
 | 
					    def add_document(self, doc: Document):
 | 
				
			||||||
        if doc.has_archive_version:
 | 
					        if doc.has_archive_version:
 | 
				
			||||||
            self.zipf.write(
 | 
					            self.zipf.write(
 | 
				
			||||||
                doc.archive_path, self.make_unique_filename(doc, archive=True)
 | 
					                doc.archive_path,
 | 
				
			||||||
 | 
					                self.make_unique_filename(doc, archive=True),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.zipf.write(doc.source_path, self.make_unique_filename(doc))
 | 
					            self.zipf.write(doc.source_path, self.make_unique_filename(doc))
 | 
				
			||||||
@ -49,5 +53,6 @@ class OriginalAndArchiveStrategy(BulkArchiveStrategy):
 | 
				
			|||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.zipf.write(
 | 
					        self.zipf.write(
 | 
				
			||||||
            doc.source_path, self.make_unique_filename(doc, folder="originals/")
 | 
					            doc.source_path,
 | 
				
			||||||
 | 
					            self.make_unique_filename(doc, folder="originals/"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
				
			|||||||
@ -2,8 +2,9 @@ import itertools
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.db.models import Q
 | 
					from django.db.models import Q
 | 
				
			||||||
from django_q.tasks import async_task
 | 
					from django_q.tasks import async_task
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.models import Document, Correspondent, DocumentType
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def set_correspondent(doc_ids, correspondent):
 | 
					def set_correspondent(doc_ids, correspondent):
 | 
				
			||||||
@ -40,7 +41,7 @@ def add_tag(doc_ids, tag):
 | 
				
			|||||||
    DocumentTagRelationship = Document.tags.through
 | 
					    DocumentTagRelationship = Document.tags.through
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DocumentTagRelationship.objects.bulk_create(
 | 
					    DocumentTagRelationship.objects.bulk_create(
 | 
				
			||||||
        [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs]
 | 
					        [DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs],
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
 | 
					    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
 | 
				
			||||||
@ -56,7 +57,7 @@ def remove_tag(doc_ids, tag):
 | 
				
			|||||||
    DocumentTagRelationship = Document.tags.through
 | 
					    DocumentTagRelationship = Document.tags.through
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    DocumentTagRelationship.objects.filter(
 | 
					    DocumentTagRelationship.objects.filter(
 | 
				
			||||||
        Q(document_id__in=affected_docs) & Q(tag_id=tag)
 | 
					        Q(document_id__in=affected_docs) & Q(tag_id=tag),
 | 
				
			||||||
    ).delete()
 | 
					    ).delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
 | 
					    async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,10 +1,11 @@
 | 
				
			|||||||
import textwrap
 | 
					import textwrap
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.checks import Error, register
 | 
					from django.core.checks import Error
 | 
				
			||||||
 | 
					from django.core.checks import register
 | 
				
			||||||
from django.core.exceptions import FieldError
 | 
					from django.core.exceptions import FieldError
 | 
				
			||||||
from django.db.utils import OperationalError, ProgrammingError
 | 
					from django.db.utils import OperationalError
 | 
				
			||||||
 | 
					from django.db.utils import ProgrammingError
 | 
				
			||||||
from documents.signals import document_consumer_declaration
 | 
					from documents.signals import document_consumer_declaration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -16,7 +17,7 @@ def changed_password_check(app_configs, **kwargs):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        encrypted_doc = Document.objects.filter(
 | 
					        encrypted_doc = Document.objects.filter(
 | 
				
			||||||
            storage_type=Document.STORAGE_TYPE_GPG
 | 
					            storage_type=Document.STORAGE_TYPE_GPG,
 | 
				
			||||||
        ).first()
 | 
					        ).first()
 | 
				
			||||||
    except (OperationalError, ProgrammingError, FieldError):
 | 
					    except (OperationalError, ProgrammingError, FieldError):
 | 
				
			||||||
        return []  # No documents table yet
 | 
					        return []  # No documents table yet
 | 
				
			||||||
@ -27,8 +28,8 @@ def changed_password_check(app_configs, **kwargs):
 | 
				
			|||||||
            return [
 | 
					            return [
 | 
				
			||||||
                Error(
 | 
					                Error(
 | 
				
			||||||
                    "The database contains encrypted documents but no password "
 | 
					                    "The database contains encrypted documents but no password "
 | 
				
			||||||
                    "is set."
 | 
					                    "is set.",
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if not GnuPG.decrypted(encrypted_doc.source_file):
 | 
					        if not GnuPG.decrypted(encrypted_doc.source_file):
 | 
				
			||||||
@ -42,9 +43,9 @@ def changed_password_check(app_configs, **kwargs):
 | 
				
			|||||||
                If you intend to change your password, you must first export
 | 
					                If you intend to change your password, you must first export
 | 
				
			||||||
                all of the old documents, start fresh with the new password
 | 
					                all of the old documents, start fresh with the new password
 | 
				
			||||||
                and then re-import them."
 | 
					                and then re-import them."
 | 
				
			||||||
                """
 | 
					                """,
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return []
 | 
					    return []
 | 
				
			||||||
@ -61,8 +62,8 @@ def parser_check(app_configs, **kwargs):
 | 
				
			|||||||
        return [
 | 
					        return [
 | 
				
			||||||
            Error(
 | 
					            Error(
 | 
				
			||||||
                "No parsers found. This is a bug. The consumer won't be "
 | 
					                "No parsers found. This is a bug. The consumer won't be "
 | 
				
			||||||
                "able to consume any documents without parsers."
 | 
					                "able to consume any documents without parsers.",
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        return []
 | 
					        return []
 | 
				
			||||||
 | 
				
			|||||||
@ -6,8 +6,8 @@ import re
 | 
				
			|||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
from documents.models import Document, MatchingModel
 | 
					from documents.models import MatchingModel
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class IncompatibleClassifierVersionError(Exception):
 | 
					class IncompatibleClassifierVersionError(Exception):
 | 
				
			||||||
@ -30,8 +30,8 @@ def preprocess_content(content):
 | 
				
			|||||||
def load_classifier():
 | 
					def load_classifier():
 | 
				
			||||||
    if not os.path.isfile(settings.MODEL_FILE):
 | 
					    if not os.path.isfile(settings.MODEL_FILE):
 | 
				
			||||||
        logger.debug(
 | 
					        logger.debug(
 | 
				
			||||||
            f"Document classification model does not exist (yet), not "
 | 
					            "Document classification model does not exist (yet), not "
 | 
				
			||||||
            f"performing automatic matching."
 | 
					            "performing automatic matching.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        return None
 | 
					        return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -42,16 +42,16 @@ def load_classifier():
 | 
				
			|||||||
    except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
 | 
					    except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
 | 
				
			||||||
        # there's something wrong with the model file.
 | 
					        # there's something wrong with the model file.
 | 
				
			||||||
        logger.exception(
 | 
					        logger.exception(
 | 
				
			||||||
            f"Unrecoverable error while loading document "
 | 
					            "Unrecoverable error while loading document "
 | 
				
			||||||
            f"classification model, deleting model file."
 | 
					            "classification model, deleting model file.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        os.unlink(settings.MODEL_FILE)
 | 
					        os.unlink(settings.MODEL_FILE)
 | 
				
			||||||
        classifier = None
 | 
					        classifier = None
 | 
				
			||||||
    except OSError:
 | 
					    except OSError:
 | 
				
			||||||
        logger.exception(f"IO error while loading document classification model")
 | 
					        logger.exception("IO error while loading document classification model")
 | 
				
			||||||
        classifier = None
 | 
					        classifier = None
 | 
				
			||||||
    except Exception:
 | 
					    except Exception:
 | 
				
			||||||
        logger.exception(f"Unknown error while loading document classification model")
 | 
					        logger.exception("Unknown error while loading document classification model")
 | 
				
			||||||
        classifier = None
 | 
					        classifier = None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return classifier
 | 
					    return classifier
 | 
				
			||||||
@ -78,7 +78,7 @@ class DocumentClassifier(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            if schema_version != self.FORMAT_VERSION:
 | 
					            if schema_version != self.FORMAT_VERSION:
 | 
				
			||||||
                raise IncompatibleClassifierVersionError(
 | 
					                raise IncompatibleClassifierVersionError(
 | 
				
			||||||
                    "Cannor load classifier, incompatible versions."
 | 
					                    "Cannor load classifier, incompatible versions.",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
@ -122,8 +122,8 @@ class DocumentClassifier(object):
 | 
				
			|||||||
        logger.debug("Gathering data from database...")
 | 
					        logger.debug("Gathering data from database...")
 | 
				
			||||||
        m = hashlib.sha1()
 | 
					        m = hashlib.sha1()
 | 
				
			||||||
        for doc in Document.objects.order_by("pk").exclude(
 | 
					        for doc in Document.objects.order_by("pk").exclude(
 | 
				
			||||||
            tags__is_inbox_tag=True
 | 
					            tags__is_inbox_tag=True,
 | 
				
			||||||
        ):  # NOQA: E501
 | 
					        ):
 | 
				
			||||||
            preprocessed_content = preprocess_content(doc.content)
 | 
					            preprocessed_content = preprocess_content(doc.content)
 | 
				
			||||||
            m.update(preprocessed_content.encode("utf-8"))
 | 
					            m.update(preprocessed_content.encode("utf-8"))
 | 
				
			||||||
            data.append(preprocessed_content)
 | 
					            data.append(preprocessed_content)
 | 
				
			||||||
@ -146,9 +146,9 @@ class DocumentClassifier(object):
 | 
				
			|||||||
                [
 | 
					                [
 | 
				
			||||||
                    tag.pk
 | 
					                    tag.pk
 | 
				
			||||||
                    for tag in doc.tags.filter(
 | 
					                    for tag in doc.tags.filter(
 | 
				
			||||||
                        matching_algorithm=MatchingModel.MATCH_AUTO
 | 
					                        matching_algorithm=MatchingModel.MATCH_AUTO,
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                ]
 | 
					                ],
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            for tag in tags:
 | 
					            for tag in tags:
 | 
				
			||||||
                m.update(tag.to_bytes(4, "little", signed=True))
 | 
					                m.update(tag.to_bytes(4, "little", signed=True))
 | 
				
			||||||
@ -177,8 +177,11 @@ class DocumentClassifier(object):
 | 
				
			|||||||
        logger.debug(
 | 
					        logger.debug(
 | 
				
			||||||
            "{} documents, {} tag(s), {} correspondent(s), "
 | 
					            "{} documents, {} tag(s), {} correspondent(s), "
 | 
				
			||||||
            "{} document type(s).".format(
 | 
					            "{} document type(s).".format(
 | 
				
			||||||
                len(data), num_tags, num_correspondents, num_document_types
 | 
					                len(data),
 | 
				
			||||||
            )
 | 
					                num_tags,
 | 
				
			||||||
 | 
					                num_correspondents,
 | 
				
			||||||
 | 
					                num_document_types,
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        from sklearn.feature_extraction.text import CountVectorizer
 | 
					        from sklearn.feature_extraction.text import CountVectorizer
 | 
				
			||||||
@ -188,7 +191,9 @@ class DocumentClassifier(object):
 | 
				
			|||||||
        # Step 2: vectorize data
 | 
					        # Step 2: vectorize data
 | 
				
			||||||
        logger.debug("Vectorizing data...")
 | 
					        logger.debug("Vectorizing data...")
 | 
				
			||||||
        self.data_vectorizer = CountVectorizer(
 | 
					        self.data_vectorizer = CountVectorizer(
 | 
				
			||||||
            analyzer="word", ngram_range=(1, 2), min_df=0.01
 | 
					            analyzer="word",
 | 
				
			||||||
 | 
					            ngram_range=(1, 2),
 | 
				
			||||||
 | 
					            min_df=0.01,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        data_vectorized = self.data_vectorizer.fit_transform(data)
 | 
					        data_vectorized = self.data_vectorizer.fit_transform(data)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -204,7 +209,7 @@ class DocumentClassifier(object):
 | 
				
			|||||||
                ]
 | 
					                ]
 | 
				
			||||||
                self.tags_binarizer = LabelBinarizer()
 | 
					                self.tags_binarizer = LabelBinarizer()
 | 
				
			||||||
                labels_tags_vectorized = self.tags_binarizer.fit_transform(
 | 
					                labels_tags_vectorized = self.tags_binarizer.fit_transform(
 | 
				
			||||||
                    labels_tags
 | 
					                    labels_tags,
 | 
				
			||||||
                ).ravel()
 | 
					                ).ravel()
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.tags_binarizer = MultiLabelBinarizer()
 | 
					                self.tags_binarizer = MultiLabelBinarizer()
 | 
				
			||||||
@ -223,7 +228,8 @@ class DocumentClassifier(object):
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.correspondent_classifier = None
 | 
					            self.correspondent_classifier = None
 | 
				
			||||||
            logger.debug(
 | 
					            logger.debug(
 | 
				
			||||||
                "There are no correspondents. Not training correspondent " "classifier."
 | 
					                "There are no correspondents. Not training correspondent "
 | 
				
			||||||
 | 
					                "classifier.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if num_document_types > 0:
 | 
					        if num_document_types > 0:
 | 
				
			||||||
@ -233,7 +239,8 @@ class DocumentClassifier(object):
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.document_type_classifier = None
 | 
					            self.document_type_classifier = None
 | 
				
			||||||
            logger.debug(
 | 
					            logger.debug(
 | 
				
			||||||
                "There are no document types. Not training document type " "classifier."
 | 
					                "There are no document types. Not training document type "
 | 
				
			||||||
 | 
					                "classifier.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.data_hash = new_data_hash
 | 
					        self.data_hash = new_data_hash
 | 
				
			||||||
 | 
				
			|||||||
@ -15,11 +15,19 @@ from filelock import FileLock
 | 
				
			|||||||
from rest_framework.reverse import reverse
 | 
					from rest_framework.reverse import reverse
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .classifier import load_classifier
 | 
					from .classifier import load_classifier
 | 
				
			||||||
from .file_handling import create_source_path_directory, generate_unique_filename
 | 
					from .file_handling import create_source_path_directory
 | 
				
			||||||
 | 
					from .file_handling import generate_unique_filename
 | 
				
			||||||
from .loggers import LoggingMixin
 | 
					from .loggers import LoggingMixin
 | 
				
			||||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 | 
					from .models import Correspondent
 | 
				
			||||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
 | 
					from .models import Document
 | 
				
			||||||
from .signals import document_consumption_finished, document_consumption_started
 | 
					from .models import DocumentType
 | 
				
			||||||
 | 
					from .models import FileInfo
 | 
				
			||||||
 | 
					from .models import Tag
 | 
				
			||||||
 | 
					from .parsers import get_parser_class_for_mime_type
 | 
				
			||||||
 | 
					from .parsers import parse_date
 | 
				
			||||||
 | 
					from .parsers import ParseError
 | 
				
			||||||
 | 
					from .signals import document_consumption_finished
 | 
				
			||||||
 | 
					from .signals import document_consumption_started
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ConsumerError(Exception):
 | 
					class ConsumerError(Exception):
 | 
				
			||||||
@ -46,12 +54,15 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
    logging_name = "paperless.consumer"
 | 
					    logging_name = "paperless.consumer"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _send_progress(
 | 
					    def _send_progress(
 | 
				
			||||||
        self, current_progress, max_progress, status, message=None, document_id=None
 | 
					        self,
 | 
				
			||||||
 | 
					        current_progress,
 | 
				
			||||||
 | 
					        max_progress,
 | 
				
			||||||
 | 
					        status,
 | 
				
			||||||
 | 
					        message=None,
 | 
				
			||||||
 | 
					        document_id=None,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        payload = {
 | 
					        payload = {
 | 
				
			||||||
            "filename": os.path.basename(self.filename)
 | 
					            "filename": os.path.basename(self.filename) if self.filename else None,
 | 
				
			||||||
            if self.filename
 | 
					 | 
				
			||||||
            else None,  # NOQA: E501
 | 
					 | 
				
			||||||
            "task_id": self.task_id,
 | 
					            "task_id": self.task_id,
 | 
				
			||||||
            "current_progress": current_progress,
 | 
					            "current_progress": current_progress,
 | 
				
			||||||
            "max_progress": max_progress,
 | 
					            "max_progress": max_progress,
 | 
				
			||||||
@ -60,7 +71,8 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            "document_id": document_id,
 | 
					            "document_id": document_id,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
        async_to_sync(self.channel_layer.group_send)(
 | 
					        async_to_sync(self.channel_layer.group_send)(
 | 
				
			||||||
            "status_updates", {"type": "status_update", "data": payload}
 | 
					            "status_updates",
 | 
				
			||||||
 | 
					            {"type": "status_update", "data": payload},
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _fail(self, message, log_message=None, exc_info=None):
 | 
					    def _fail(self, message, log_message=None, exc_info=None):
 | 
				
			||||||
@ -83,15 +95,16 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
    def pre_check_file_exists(self):
 | 
					    def pre_check_file_exists(self):
 | 
				
			||||||
        if not os.path.isfile(self.path):
 | 
					        if not os.path.isfile(self.path):
 | 
				
			||||||
            self._fail(
 | 
					            self._fail(
 | 
				
			||||||
                MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
 | 
					                MESSAGE_FILE_NOT_FOUND,
 | 
				
			||||||
 | 
					                f"Cannot consume {self.path}: File not found.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def pre_check_duplicate(self):
 | 
					    def pre_check_duplicate(self):
 | 
				
			||||||
        with open(self.path, "rb") as f:
 | 
					        with open(self.path, "rb") as f:
 | 
				
			||||||
            checksum = hashlib.md5(f.read()).hexdigest()
 | 
					            checksum = hashlib.md5(f.read()).hexdigest()
 | 
				
			||||||
        if Document.objects.filter(
 | 
					        if Document.objects.filter(
 | 
				
			||||||
            Q(checksum=checksum) | Q(archive_checksum=checksum)
 | 
					            Q(checksum=checksum) | Q(archive_checksum=checksum),
 | 
				
			||||||
        ).exists():  # NOQA: E501
 | 
					        ).exists():
 | 
				
			||||||
            if settings.CONSUMER_DELETE_DUPLICATES:
 | 
					            if settings.CONSUMER_DELETE_DUPLICATES:
 | 
				
			||||||
                os.unlink(self.path)
 | 
					                os.unlink(self.path)
 | 
				
			||||||
            self._fail(
 | 
					            self._fail(
 | 
				
			||||||
@ -139,7 +152,8 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log(
 | 
					        self.log(
 | 
				
			||||||
            "info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
 | 
					            "info",
 | 
				
			||||||
 | 
					            f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
@ -154,7 +168,7 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                    reverse("document-thumb", kwargs={"pk": document.pk}),
 | 
					                    reverse("document-thumb", kwargs={"pk": document.pk}),
 | 
				
			||||||
                    str(document.correspondent),
 | 
					                    str(document.correspondent),
 | 
				
			||||||
                    str(",".join(document.tags.all().values_list("name", flat=True))),
 | 
					                    str(",".join(document.tags.all().values_list("name", flat=True))),
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ).wait()
 | 
					            ).wait()
 | 
				
			||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            self._fail(
 | 
					            self._fail(
 | 
				
			||||||
@ -213,7 +227,9 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
        # Notify all listeners that we're going to do some work.
 | 
					        # Notify all listeners that we're going to do some work.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document_consumption_started.send(
 | 
					        document_consumption_started.send(
 | 
				
			||||||
            sender=self.__class__, filename=self.path, logging_group=self.logging_group
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            filename=self.path,
 | 
				
			||||||
 | 
					            logging_group=self.logging_group,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.run_pre_consume_script()
 | 
					        self.run_pre_consume_script()
 | 
				
			||||||
@ -247,7 +263,9 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
            self.log("debug", f"Generating thumbnail for {self.filename}...")
 | 
					            self.log("debug", f"Generating thumbnail for {self.filename}...")
 | 
				
			||||||
            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
 | 
					            self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
 | 
				
			||||||
            thumbnail = document_parser.get_optimised_thumbnail(
 | 
					            thumbnail = document_parser.get_optimised_thumbnail(
 | 
				
			||||||
                self.path, mime_type, self.filename
 | 
					                self.path,
 | 
				
			||||||
 | 
					                mime_type,
 | 
				
			||||||
 | 
					                self.filename,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            text = document_parser.get_text()
 | 
					            text = document_parser.get_text()
 | 
				
			||||||
@ -301,21 +319,26 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                    self._write(document.storage_type, self.path, document.source_path)
 | 
					                    self._write(document.storage_type, self.path, document.source_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    self._write(
 | 
					                    self._write(
 | 
				
			||||||
                        document.storage_type, thumbnail, document.thumbnail_path
 | 
					                        document.storage_type,
 | 
				
			||||||
 | 
					                        thumbnail,
 | 
				
			||||||
 | 
					                        document.thumbnail_path,
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    if archive_path and os.path.isfile(archive_path):
 | 
					                    if archive_path and os.path.isfile(archive_path):
 | 
				
			||||||
                        document.archive_filename = generate_unique_filename(
 | 
					                        document.archive_filename = generate_unique_filename(
 | 
				
			||||||
                            document, archive_filename=True
 | 
					                            document,
 | 
				
			||||||
 | 
					                            archive_filename=True,
 | 
				
			||||||
                        )
 | 
					                        )
 | 
				
			||||||
                        create_source_path_directory(document.archive_path)
 | 
					                        create_source_path_directory(document.archive_path)
 | 
				
			||||||
                        self._write(
 | 
					                        self._write(
 | 
				
			||||||
                            document.storage_type, archive_path, document.archive_path
 | 
					                            document.storage_type,
 | 
				
			||||||
 | 
					                            archive_path,
 | 
				
			||||||
 | 
					                            document.archive_path,
 | 
				
			||||||
                        )
 | 
					                        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                        with open(archive_path, "rb") as f:
 | 
					                        with open(archive_path, "rb") as f:
 | 
				
			||||||
                            document.archive_checksum = hashlib.md5(
 | 
					                            document.archive_checksum = hashlib.md5(
 | 
				
			||||||
                                f.read()
 | 
					                                f.read(),
 | 
				
			||||||
                            ).hexdigest()
 | 
					                            ).hexdigest()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # Don't save with the lock active. Saving will cause the file
 | 
					                # Don't save with the lock active. Saving will cause the file
 | 
				
			||||||
@ -328,7 +351,8 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                # https://github.com/jonaswinkler/paperless-ng/discussions/1037
 | 
					                # https://github.com/jonaswinkler/paperless-ng/discussions/1037
 | 
				
			||||||
                shadow_file = os.path.join(
 | 
					                shadow_file = os.path.join(
 | 
				
			||||||
                    os.path.dirname(self.path), "._" + os.path.basename(self.path)
 | 
					                    os.path.dirname(self.path),
 | 
				
			||||||
 | 
					                    "._" + os.path.basename(self.path),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if os.path.isfile(shadow_file):
 | 
					                if os.path.isfile(shadow_file):
 | 
				
			||||||
@ -390,12 +414,12 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
    def apply_overrides(self, document):
 | 
					    def apply_overrides(self, document):
 | 
				
			||||||
        if self.override_correspondent_id:
 | 
					        if self.override_correspondent_id:
 | 
				
			||||||
            document.correspondent = Correspondent.objects.get(
 | 
					            document.correspondent = Correspondent.objects.get(
 | 
				
			||||||
                pk=self.override_correspondent_id
 | 
					                pk=self.override_correspondent_id,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.override_document_type_id:
 | 
					        if self.override_document_type_id:
 | 
				
			||||||
            document.document_type = DocumentType.objects.get(
 | 
					            document.document_type = DocumentType.objects.get(
 | 
				
			||||||
                pk=self.override_document_type_id
 | 
					                pk=self.override_document_type_id,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if self.override_tag_ids:
 | 
					        if self.override_tag_ids:
 | 
				
			||||||
 | 
				
			|||||||
@ -103,15 +103,17 @@ def generate_unique_filename(doc, archive_filename=False):
 | 
				
			|||||||
    if archive_filename and doc.filename:
 | 
					    if archive_filename and doc.filename:
 | 
				
			||||||
        new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
 | 
					        new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
 | 
				
			||||||
        if new_filename == old_filename or not os.path.exists(
 | 
					        if new_filename == old_filename or not os.path.exists(
 | 
				
			||||||
            os.path.join(root, new_filename)
 | 
					            os.path.join(root, new_filename),
 | 
				
			||||||
        ):  # NOQA: E501
 | 
					        ):
 | 
				
			||||||
            return new_filename
 | 
					            return new_filename
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    counter = 0
 | 
					    counter = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    while True:
 | 
					    while True:
 | 
				
			||||||
        new_filename = generate_filename(
 | 
					        new_filename = generate_filename(
 | 
				
			||||||
            doc, counter, archive_filename=archive_filename
 | 
					            doc,
 | 
				
			||||||
 | 
					            counter,
 | 
				
			||||||
 | 
					            archive_filename=archive_filename,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        if new_filename == old_filename:
 | 
					        if new_filename == old_filename:
 | 
				
			||||||
            # still the same as before.
 | 
					            # still the same as before.
 | 
				
			||||||
@ -137,14 +139,16 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            if doc.correspondent:
 | 
					            if doc.correspondent:
 | 
				
			||||||
                correspondent = pathvalidate.sanitize_filename(
 | 
					                correspondent = pathvalidate.sanitize_filename(
 | 
				
			||||||
                    doc.correspondent.name, replacement_text="-"
 | 
					                    doc.correspondent.name,
 | 
				
			||||||
 | 
					                    replacement_text="-",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                correspondent = "none"
 | 
					                correspondent = "none"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if doc.document_type:
 | 
					            if doc.document_type:
 | 
				
			||||||
                document_type = pathvalidate.sanitize_filename(
 | 
					                document_type = pathvalidate.sanitize_filename(
 | 
				
			||||||
                    doc.document_type.name, replacement_text="-"
 | 
					                    doc.document_type.name,
 | 
				
			||||||
 | 
					                    replacement_text="-",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                document_type = "none"
 | 
					                document_type = "none"
 | 
				
			||||||
@ -160,9 +164,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
 | 
				
			|||||||
                document_type=document_type,
 | 
					                document_type=document_type,
 | 
				
			||||||
                created=datetime.date.isoformat(doc.created),
 | 
					                created=datetime.date.isoformat(doc.created),
 | 
				
			||||||
                created_year=doc.created.year if doc.created else "none",
 | 
					                created_year=doc.created.year if doc.created else "none",
 | 
				
			||||||
                created_month=f"{doc.created.month:02}"
 | 
					                created_month=f"{doc.created.month:02}" if doc.created else "none",
 | 
				
			||||||
                if doc.created
 | 
					 | 
				
			||||||
                else "none",  # NOQA: E501
 | 
					 | 
				
			||||||
                created_day=f"{doc.created.day:02}" if doc.created else "none",
 | 
					                created_day=f"{doc.created.day:02}" if doc.created else "none",
 | 
				
			||||||
                added=datetime.date.isoformat(doc.added),
 | 
					                added=datetime.date.isoformat(doc.added),
 | 
				
			||||||
                added_year=doc.added.year if doc.added else "none",
 | 
					                added_year=doc.added.year if doc.added else "none",
 | 
				
			||||||
@ -178,7 +180,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
 | 
				
			|||||||
    except (ValueError, KeyError, IndexError):
 | 
					    except (ValueError, KeyError, IndexError):
 | 
				
			||||||
        logger.warning(
 | 
					        logger.warning(
 | 
				
			||||||
            f"Invalid PAPERLESS_FILENAME_FORMAT: "
 | 
					            f"Invalid PAPERLESS_FILENAME_FORMAT: "
 | 
				
			||||||
            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
 | 
					            f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    counter_str = f"_{counter:02}" if counter else ""
 | 
					    counter_str = f"_{counter:02}" if counter else ""
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,13 @@
 | 
				
			|||||||
from django.db.models import Q
 | 
					from django.db.models import Q
 | 
				
			||||||
from django_filters.rest_framework import BooleanFilter, FilterSet, Filter
 | 
					from django_filters.rest_framework import BooleanFilter
 | 
				
			||||||
 | 
					from django_filters.rest_framework import Filter
 | 
				
			||||||
 | 
					from django_filters.rest_framework import FilterSet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .models import Correspondent, Document, Tag, DocumentType, Log
 | 
					from .models import Correspondent
 | 
				
			||||||
 | 
					from .models import Document
 | 
				
			||||||
 | 
					from .models import DocumentType
 | 
				
			||||||
 | 
					from .models import Log
 | 
				
			||||||
 | 
					from .models import Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
 | 
					CHAR_KWARGS = ["istartswith", "iendswith", "icontains", "iexact"]
 | 
				
			||||||
ID_KWARGS = ["in", "exact"]
 | 
					ID_KWARGS = ["in", "exact"]
 | 
				
			||||||
@ -75,7 +81,10 @@ class TitleContentFilter(Filter):
 | 
				
			|||||||
class DocumentFilterSet(FilterSet):
 | 
					class DocumentFilterSet(FilterSet):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    is_tagged = BooleanFilter(
 | 
					    is_tagged = BooleanFilter(
 | 
				
			||||||
        label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True
 | 
					        label="Is tagged",
 | 
				
			||||||
 | 
					        field_name="tags",
 | 
				
			||||||
 | 
					        lookup_expr="isnull",
 | 
				
			||||||
 | 
					        exclude=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tags__id__all = TagsFilter()
 | 
					    tags__id__all = TagsFilter()
 | 
				
			||||||
 | 
				
			|||||||
@ -1,21 +1,30 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					import math
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from contextlib import contextmanager
 | 
					from contextlib import contextmanager
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import math
 | 
					 | 
				
			||||||
from dateutil.parser import isoparse
 | 
					from dateutil.parser import isoparse
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from whoosh import highlight, classify, query
 | 
					from documents.models import Document
 | 
				
			||||||
from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME, BOOLEAN
 | 
					from whoosh import classify
 | 
				
			||||||
 | 
					from whoosh import highlight
 | 
				
			||||||
 | 
					from whoosh import query
 | 
				
			||||||
 | 
					from whoosh.fields import BOOLEAN
 | 
				
			||||||
 | 
					from whoosh.fields import DATETIME
 | 
				
			||||||
 | 
					from whoosh.fields import KEYWORD
 | 
				
			||||||
 | 
					from whoosh.fields import NUMERIC
 | 
				
			||||||
 | 
					from whoosh.fields import Schema
 | 
				
			||||||
 | 
					from whoosh.fields import TEXT
 | 
				
			||||||
from whoosh.highlight import HtmlFormatter
 | 
					from whoosh.highlight import HtmlFormatter
 | 
				
			||||||
from whoosh.index import create_in, exists_in, open_dir
 | 
					from whoosh.index import create_in
 | 
				
			||||||
 | 
					from whoosh.index import exists_in
 | 
				
			||||||
 | 
					from whoosh.index import open_dir
 | 
				
			||||||
from whoosh.qparser import MultifieldParser
 | 
					from whoosh.qparser import MultifieldParser
 | 
				
			||||||
from whoosh.qparser.dateparse import DateParserPlugin
 | 
					from whoosh.qparser.dateparse import DateParserPlugin
 | 
				
			||||||
from whoosh.searching import ResultsPage, Searcher
 | 
					from whoosh.searching import ResultsPage
 | 
				
			||||||
 | 
					from whoosh.searching import Searcher
 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents.models import Document
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
logger = logging.getLogger("paperless.index")
 | 
					logger = logging.getLogger("paperless.index")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -45,7 +54,7 @@ def open_index(recreate=False):
 | 
				
			|||||||
        if exists_in(settings.INDEX_DIR) and not recreate:
 | 
					        if exists_in(settings.INDEX_DIR) and not recreate:
 | 
				
			||||||
            return open_dir(settings.INDEX_DIR, schema=get_schema())
 | 
					            return open_dir(settings.INDEX_DIR, schema=get_schema())
 | 
				
			||||||
    except Exception:
 | 
					    except Exception:
 | 
				
			||||||
        logger.exception(f"Error while opening the index, recreating.")
 | 
					        logger.exception("Error while opening the index, recreating.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not os.path.isdir(settings.INDEX_DIR):
 | 
					    if not os.path.isdir(settings.INDEX_DIR):
 | 
				
			||||||
        os.makedirs(settings.INDEX_DIR, exist_ok=True)
 | 
					        os.makedirs(settings.INDEX_DIR, exist_ok=True)
 | 
				
			||||||
@ -138,11 +147,11 @@ class DelayedQuery:
 | 
				
			|||||||
                criterias.append(query.Term("has_type", v == "false"))
 | 
					                criterias.append(query.Term("has_type", v == "false"))
 | 
				
			||||||
            elif k == "created__date__lt":
 | 
					            elif k == "created__date__lt":
 | 
				
			||||||
                criterias.append(
 | 
					                criterias.append(
 | 
				
			||||||
                    query.DateRange("created", start=None, end=isoparse(v))
 | 
					                    query.DateRange("created", start=None, end=isoparse(v)),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            elif k == "created__date__gt":
 | 
					            elif k == "created__date__gt":
 | 
				
			||||||
                criterias.append(
 | 
					                criterias.append(
 | 
				
			||||||
                    query.DateRange("created", start=isoparse(v), end=None)
 | 
					                    query.DateRange("created", start=isoparse(v), end=None),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            elif k == "added__date__gt":
 | 
					            elif k == "added__date__gt":
 | 
				
			||||||
                criterias.append(query.DateRange("added", start=isoparse(v), end=None))
 | 
					                criterias.append(query.DateRange("added", start=isoparse(v), end=None))
 | 
				
			||||||
@ -220,7 +229,7 @@ class DelayedQuery:
 | 
				
			|||||||
                    hit[1],
 | 
					                    hit[1],
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                page.results.top_n,
 | 
					                page.results.top_n,
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.saved_results[item.start] = page
 | 
					        self.saved_results[item.start] = page
 | 
				
			||||||
@ -240,7 +249,7 @@ class DelayedFullTextQuery(DelayedQuery):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        corrected = self.searcher.correct_query(q, q_str)
 | 
					        corrected = self.searcher.correct_query(q, q_str)
 | 
				
			||||||
        if corrected.query != q:
 | 
					        if corrected.query != q:
 | 
				
			||||||
            corrected_query = corrected.string
 | 
					            corrected.query = corrected.string
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return q, None
 | 
					        return q, None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -252,10 +261,14 @@ class DelayedMoreLikeThisQuery(DelayedQuery):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        docnum = self.searcher.document_number(id=more_like_doc_id)
 | 
					        docnum = self.searcher.document_number(id=more_like_doc_id)
 | 
				
			||||||
        kts = self.searcher.key_terms_from_text(
 | 
					        kts = self.searcher.key_terms_from_text(
 | 
				
			||||||
            "content", content, numterms=20, model=classify.Bo1Model, normalize=False
 | 
					            "content",
 | 
				
			||||||
 | 
					            content,
 | 
				
			||||||
 | 
					            numterms=20,
 | 
				
			||||||
 | 
					            model=classify.Bo1Model,
 | 
				
			||||||
 | 
					            normalize=False,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        q = query.Or(
 | 
					        q = query.Or(
 | 
				
			||||||
            [query.Term("content", word, boost=weight) for word, weight in kts]
 | 
					            [query.Term("content", word, boost=weight) for word, weight in kts],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        mask = {docnum}
 | 
					        mask = {docnum}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -266,7 +279,9 @@ def autocomplete(ix, term, limit=10):
 | 
				
			|||||||
    with ix.reader() as reader:
 | 
					    with ix.reader() as reader:
 | 
				
			||||||
        terms = []
 | 
					        terms = []
 | 
				
			||||||
        for (score, t) in reader.most_distinctive_terms(
 | 
					        for (score, t) in reader.most_distinctive_terms(
 | 
				
			||||||
            "content", number=limit, prefix=term.lower()
 | 
					            "content",
 | 
				
			||||||
 | 
					            number=limit,
 | 
				
			||||||
 | 
					            prefix=term.lower(),
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            terms.append(t)
 | 
					            terms.append(t)
 | 
				
			||||||
        return terms
 | 
					        return terms
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,6 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
import uuid
 | 
					import uuid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class LoggingMixin:
 | 
					class LoggingMixin:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,8 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					from django.core.management.base import CommandError
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
from paperless.db import GnuPG
 | 
					from paperless.db import GnuPG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -31,9 +31,9 @@ class Command(BaseCommand):
 | 
				
			|||||||
                "this unless you've got a recent backup\nWARNING: handy.  It "
 | 
					                "this unless you've got a recent backup\nWARNING: handy.  It "
 | 
				
			||||||
                "*should* work without a hitch, but be safe and backup your\n"
 | 
					                "*should* work without a hitch, but be safe and backup your\n"
 | 
				
			||||||
                "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
 | 
					                "WARNING: stuff first.\n\nHit Ctrl+C to exit now, or Enter to "
 | 
				
			||||||
                "continue.\n\n"
 | 
					                "continue.\n\n",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            __ = input()
 | 
					            _ = input()
 | 
				
			||||||
        except KeyboardInterrupt:
 | 
					        except KeyboardInterrupt:
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -41,7 +41,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
        if not passphrase:
 | 
					        if not passphrase:
 | 
				
			||||||
            raise CommandError(
 | 
					            raise CommandError(
 | 
				
			||||||
                "Passphrase not defined.  Please set it with --passphrase or "
 | 
					                "Passphrase not defined.  Please set it with --passphrase or "
 | 
				
			||||||
                "by declaring it in your environment or your config."
 | 
					                "by declaring it in your environment or your config.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.__gpg_to_unencrypted(passphrase)
 | 
					        self.__gpg_to_unencrypted(passphrase)
 | 
				
			||||||
@ -50,7 +50,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
    def __gpg_to_unencrypted(passphrase):
 | 
					    def __gpg_to_unencrypted(passphrase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        encrypted_files = Document.objects.filter(
 | 
					        encrypted_files = Document.objects.filter(
 | 
				
			||||||
            storage_type=Document.STORAGE_TYPE_GPG
 | 
					            storage_type=Document.STORAGE_TYPE_GPG,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for document in encrypted_files:
 | 
					        for document in encrypted_files:
 | 
				
			||||||
@ -71,7 +71,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
            if not ext == ".gpg":
 | 
					            if not ext == ".gpg":
 | 
				
			||||||
                raise CommandError(
 | 
					                raise CommandError(
 | 
				
			||||||
                    f"Abort: encrypted file {document.source_path} does not "
 | 
					                    f"Abort: encrypted file {document.source_path} does not "
 | 
				
			||||||
                    f"end with .gpg"
 | 
					                    f"end with .gpg",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            document.filename = os.path.splitext(document.filename)[0]
 | 
					            document.filename = os.path.splitext(document.filename)[0]
 | 
				
			||||||
@ -83,7 +83,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
                f.write(raw_thumb)
 | 
					                f.write(raw_thumb)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            Document.objects.filter(id=document.id).update(
 | 
					            Document.objects.filter(id=document.id).update(
 | 
				
			||||||
                storage_type=document.storage_type, filename=document.filename
 | 
					                storage_type=document.storage_type,
 | 
				
			||||||
 | 
					                filename=document.filename,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            for path in old_paths:
 | 
					            for path in old_paths:
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,6 @@
 | 
				
			|||||||
import hashlib
 | 
					import hashlib
 | 
				
			||||||
import multiprocessing
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					import multiprocessing
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
import uuid
 | 
					import uuid
 | 
				
			||||||
@ -11,12 +10,12 @@ from django import db
 | 
				
			|||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.management.base import BaseCommand
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
from django.db import transaction
 | 
					from django.db import transaction
 | 
				
			||||||
from filelock import FileLock
 | 
					 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from filelock import FileLock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ... import index
 | 
					from ... import index
 | 
				
			||||||
from ...file_handling import create_source_path_directory, generate_unique_filename
 | 
					from ...file_handling import create_source_path_directory
 | 
				
			||||||
 | 
					from ...file_handling import generate_unique_filename
 | 
				
			||||||
from ...parsers import get_parser_class_for_mime_type
 | 
					from ...parsers import get_parser_class_for_mime_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -33,7 +32,7 @@ def handle_document(document_id):
 | 
				
			|||||||
    if not parser_class:
 | 
					    if not parser_class:
 | 
				
			||||||
        logger.error(
 | 
					        logger.error(
 | 
				
			||||||
            f"No parser found for mime type {mime_type}, cannot "
 | 
					            f"No parser found for mime type {mime_type}, cannot "
 | 
				
			||||||
            f"archive document {document} (ID: {document_id})"
 | 
					            f"archive document {document} (ID: {document_id})",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        return
 | 
					        return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -43,7 +42,9 @@ def handle_document(document_id):
 | 
				
			|||||||
        parser.parse(document.source_path, mime_type, document.get_public_filename())
 | 
					        parser.parse(document.source_path, mime_type, document.get_public_filename())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        thumbnail = parser.get_optimised_thumbnail(
 | 
					        thumbnail = parser.get_optimised_thumbnail(
 | 
				
			||||||
            document.source_path, mime_type, document.get_public_filename()
 | 
					            document.source_path,
 | 
				
			||||||
 | 
					            mime_type,
 | 
				
			||||||
 | 
					            document.get_public_filename(),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if parser.get_archive_path():
 | 
					        if parser.get_archive_path():
 | 
				
			||||||
@ -55,7 +56,8 @@ def handle_document(document_id):
 | 
				
			|||||||
                # We also don't use save() since that triggers the filehandling
 | 
					                # We also don't use save() since that triggers the filehandling
 | 
				
			||||||
                # logic, and we don't want that yet (file not yet in place)
 | 
					                # logic, and we don't want that yet (file not yet in place)
 | 
				
			||||||
                document.archive_filename = generate_unique_filename(
 | 
					                document.archive_filename = generate_unique_filename(
 | 
				
			||||||
                    document, archive_filename=True
 | 
					                    document,
 | 
				
			||||||
 | 
					                    archive_filename=True,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                Document.objects.filter(pk=document.pk).update(
 | 
					                Document.objects.filter(pk=document.pk).update(
 | 
				
			||||||
                    archive_checksum=checksum,
 | 
					                    archive_checksum=checksum,
 | 
				
			||||||
@ -70,9 +72,9 @@ def handle_document(document_id):
 | 
				
			|||||||
            with index.open_index_writer() as writer:
 | 
					            with index.open_index_writer() as writer:
 | 
				
			||||||
                index.update_document(writer, document)
 | 
					                index.update_document(writer, document)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception:
 | 
				
			||||||
        logger.exception(
 | 
					        logger.exception(
 | 
				
			||||||
            f"Error while parsing document {document} " f"(ID: {document_id})"
 | 
					            f"Error while parsing document {document} " f"(ID: {document_id})",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    finally:
 | 
					    finally:
 | 
				
			||||||
        parser.cleanup()
 | 
					        parser.cleanup()
 | 
				
			||||||
@ -86,7 +88,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
        back-tag all previously indexed documents with metadata created (or
 | 
					        back-tag all previously indexed documents with metadata created (or
 | 
				
			||||||
        modified) after their initial import.
 | 
					        modified) after their initial import.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -129,7 +132,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
            map(
 | 
					            map(
 | 
				
			||||||
                lambda doc: doc.id,
 | 
					                lambda doc: doc.id,
 | 
				
			||||||
                filter(lambda d: overwrite or not d.has_archive_version, documents),
 | 
					                filter(lambda d: overwrite or not d.has_archive_version, documents),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Note to future self: this prevents django from reusing database
 | 
					        # Note to future self: this prevents django from reusing database
 | 
				
			||||||
@ -146,7 +149,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
                        pool.imap_unordered(handle_document, document_ids),
 | 
					                        pool.imap_unordered(handle_document, document_ids),
 | 
				
			||||||
                        total=len(document_ids),
 | 
					                        total=len(document_ids),
 | 
				
			||||||
                        disable=options["no_progress_bar"],
 | 
					                        disable=options["no_progress_bar"],
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
        except KeyboardInterrupt:
 | 
					        except KeyboardInterrupt:
 | 
				
			||||||
            print("Aborting...")
 | 
					            print("Aborting...")
 | 
				
			||||||
 | 
				
			|||||||
@ -1,17 +1,18 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
from pathlib import Path, PurePath
 | 
					from pathlib import Path
 | 
				
			||||||
 | 
					from pathlib import PurePath
 | 
				
			||||||
from threading import Thread
 | 
					from threading import Thread
 | 
				
			||||||
from time import sleep
 | 
					from time import sleep
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					from django.core.management.base import CommandError
 | 
				
			||||||
from django_q.tasks import async_task
 | 
					from django_q.tasks import async_task
 | 
				
			||||||
from watchdog.events import FileSystemEventHandler
 | 
					 | 
				
			||||||
from watchdog.observers.polling import PollingObserver
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Tag
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.parsers import is_file_ext_supported
 | 
					from documents.parsers import is_file_ext_supported
 | 
				
			||||||
 | 
					from watchdog.events import FileSystemEventHandler
 | 
				
			||||||
 | 
					from watchdog.observers.polling import PollingObserver
 | 
				
			||||||
 | 
					
 | 
				
			||||||
try:
 | 
					try:
 | 
				
			||||||
    from inotifyrecursive import INotify, flags
 | 
					    from inotifyrecursive import INotify, flags
 | 
				
			||||||
@ -29,7 +30,7 @@ def _tags_from_path(filepath):
 | 
				
			|||||||
    path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
 | 
					    path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
 | 
				
			||||||
    for part in path_parts:
 | 
					    for part in path_parts:
 | 
				
			||||||
        tag_ids.add(
 | 
					        tag_ids.add(
 | 
				
			||||||
            Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk
 | 
					            Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return tag_ids
 | 
					    return tag_ids
 | 
				
			||||||
@ -56,7 +57,7 @@ def _consume(filepath):
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        if settings.CONSUMER_SUBDIRS_AS_TAGS:
 | 
					        if settings.CONSUMER_SUBDIRS_AS_TAGS:
 | 
				
			||||||
            tag_ids = _tags_from_path(filepath)
 | 
					            tag_ids = _tags_from_path(filepath)
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception:
 | 
				
			||||||
        logger.exception("Error creating tags from path")
 | 
					        logger.exception("Error creating tags from path")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
@ -67,7 +68,7 @@ def _consume(filepath):
 | 
				
			|||||||
            override_tag_ids=tag_ids if tag_ids else None,
 | 
					            override_tag_ids=tag_ids if tag_ids else None,
 | 
				
			||||||
            task_name=os.path.basename(filepath)[:100],
 | 
					            task_name=os.path.basename(filepath)[:100],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    except Exception as e:
 | 
					    except Exception:
 | 
				
			||||||
        # Catch all so that the consumer won't crash.
 | 
					        # Catch all so that the consumer won't crash.
 | 
				
			||||||
        # This is also what the test case is listening for to check for
 | 
					        # This is also what the test case is listening for to check for
 | 
				
			||||||
        # errors.
 | 
					        # errors.
 | 
				
			||||||
@ -86,7 +87,7 @@ def _consume_wait_unmodified(file):
 | 
				
			|||||||
            new_mtime = os.stat(file).st_mtime
 | 
					            new_mtime = os.stat(file).st_mtime
 | 
				
			||||||
        except FileNotFoundError:
 | 
					        except FileNotFoundError:
 | 
				
			||||||
            logger.debug(
 | 
					            logger.debug(
 | 
				
			||||||
                f"File {file} moved while waiting for it to remain " f"unmodified."
 | 
					                f"File {file} moved while waiting for it to remain " f"unmodified.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            return
 | 
					            return
 | 
				
			||||||
        if new_mtime == mtime:
 | 
					        if new_mtime == mtime:
 | 
				
			||||||
 | 
				
			|||||||
@ -9,7 +9,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
        Trains the classifier on your data and saves the resulting models to a
 | 
					        Trains the classifier on your data and saves the resulting models to a
 | 
				
			||||||
        file. The document consumer will then automatically use this new model.
 | 
					        file. The document consumer will then automatically use this new model.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, *args, **kwargs):
 | 
					    def __init__(self, *args, **kwargs):
 | 
				
			||||||
 | 
				
			|||||||
@ -6,28 +6,28 @@ import time
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.contrib.auth.models import User, Group
 | 
					from django.contrib.auth.models import Group
 | 
				
			||||||
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.core import serializers
 | 
					from django.core import serializers
 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					from django.core.management.base import CommandError
 | 
				
			||||||
from django.db import transaction
 | 
					from django.db import transaction
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import SavedView
 | 
				
			||||||
 | 
					from documents.models import SavedViewFilterRule
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_ARCHIVE_NAME
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_FILE_NAME
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_THUMBNAIL_NAME
 | 
				
			||||||
from filelock import FileLock
 | 
					from filelock import FileLock
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import (
 | 
					 | 
				
			||||||
    Document,
 | 
					 | 
				
			||||||
    Correspondent,
 | 
					 | 
				
			||||||
    Tag,
 | 
					 | 
				
			||||||
    DocumentType,
 | 
					 | 
				
			||||||
    SavedView,
 | 
					 | 
				
			||||||
    SavedViewFilterRule,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from documents.settings import (
 | 
					 | 
				
			||||||
    EXPORTER_FILE_NAME,
 | 
					 | 
				
			||||||
    EXPORTER_THUMBNAIL_NAME,
 | 
					 | 
				
			||||||
    EXPORTER_ARCHIVE_NAME,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from paperless.db import GnuPG
 | 
					from paperless.db import GnuPG
 | 
				
			||||||
from paperless_mail.models import MailAccount, MailRule
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
from ...file_handling import generate_filename, delete_empty_directories
 | 
					from paperless_mail.models import MailRule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ...file_handling import delete_empty_directories
 | 
				
			||||||
 | 
					from ...file_handling import generate_filename
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Command(BaseCommand):
 | 
					class Command(BaseCommand):
 | 
				
			||||||
@ -37,7 +37,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
        directory.  And include a manifest file containing document data for
 | 
					        directory.  And include a manifest file containing document data for
 | 
				
			||||||
        easy import.
 | 
					        easy import.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -107,20 +108,20 @@ class Command(BaseCommand):
 | 
				
			|||||||
        # 1. Take a snapshot of what files exist in the current export folder
 | 
					        # 1. Take a snapshot of what files exist in the current export folder
 | 
				
			||||||
        for root, dirs, files in os.walk(self.target):
 | 
					        for root, dirs, files in os.walk(self.target):
 | 
				
			||||||
            self.files_in_export_dir.extend(
 | 
					            self.files_in_export_dir.extend(
 | 
				
			||||||
                map(lambda f: os.path.abspath(os.path.join(root, f)), files)
 | 
					                map(lambda f: os.path.abspath(os.path.join(root, f)), files),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # 2. Create manifest, containing all correspondents, types, tags and
 | 
					        # 2. Create manifest, containing all correspondents, types, tags and
 | 
				
			||||||
        # documents
 | 
					        # documents
 | 
				
			||||||
        with transaction.atomic():
 | 
					        with transaction.atomic():
 | 
				
			||||||
            manifest = json.loads(
 | 
					            manifest = json.loads(
 | 
				
			||||||
                serializers.serialize("json", Correspondent.objects.all())
 | 
					                serializers.serialize("json", Correspondent.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
 | 
					            manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(
 | 
					            manifest += json.loads(
 | 
				
			||||||
                serializers.serialize("json", DocumentType.objects.all())
 | 
					                serializers.serialize("json", DocumentType.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            documents = Document.objects.order_by("id")
 | 
					            documents = Document.objects.order_by("id")
 | 
				
			||||||
@ -129,19 +130,19 @@ class Command(BaseCommand):
 | 
				
			|||||||
            manifest += document_manifest
 | 
					            manifest += document_manifest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(
 | 
					            manifest += json.loads(
 | 
				
			||||||
                serializers.serialize("json", MailAccount.objects.all())
 | 
					                serializers.serialize("json", MailAccount.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(
 | 
					            manifest += json.loads(
 | 
				
			||||||
                serializers.serialize("json", MailRule.objects.all())
 | 
					                serializers.serialize("json", MailRule.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(
 | 
					            manifest += json.loads(
 | 
				
			||||||
                serializers.serialize("json", SavedView.objects.all())
 | 
					                serializers.serialize("json", SavedView.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(
 | 
					            manifest += json.loads(
 | 
				
			||||||
                serializers.serialize("json", SavedViewFilterRule.objects.all())
 | 
					                serializers.serialize("json", SavedViewFilterRule.objects.all()),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            manifest += json.loads(serializers.serialize("json", Group.objects.all()))
 | 
					            manifest += json.loads(serializers.serialize("json", Group.objects.all()))
 | 
				
			||||||
@ -155,9 +156,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
            disable=progress_bar_disable,
 | 
					            disable=progress_bar_disable,
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            # 3.1. store files unencrypted
 | 
					            # 3.1. store files unencrypted
 | 
				
			||||||
            document_dict["fields"][
 | 
					            document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
                "storage_type"
 | 
					 | 
				
			||||||
            ] = Document.STORAGE_TYPE_UNENCRYPTED  # NOQA: E501
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
            document = document_map[document_dict["pk"]]
 | 
					            document = document_map[document_dict["pk"]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -166,7 +165,9 @@ class Command(BaseCommand):
 | 
				
			|||||||
            while True:
 | 
					            while True:
 | 
				
			||||||
                if self.use_filename_format:
 | 
					                if self.use_filename_format:
 | 
				
			||||||
                    base_name = generate_filename(
 | 
					                    base_name = generate_filename(
 | 
				
			||||||
                        document, counter=filename_counter, append_gpg=False
 | 
					                        document,
 | 
				
			||||||
 | 
					                        counter=filename_counter,
 | 
				
			||||||
 | 
					                        append_gpg=False,
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    base_name = document.get_public_filename(counter=filename_counter)
 | 
					                    base_name = document.get_public_filename(counter=filename_counter)
 | 
				
			||||||
@ -217,14 +218,18 @@ class Command(BaseCommand):
 | 
				
			|||||||
                            os.utime(archive_target, times=(t, t))
 | 
					                            os.utime(archive_target, times=(t, t))
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.check_and_copy(
 | 
					                self.check_and_copy(
 | 
				
			||||||
                    document.source_path, document.checksum, original_target
 | 
					                    document.source_path,
 | 
				
			||||||
 | 
					                    document.checksum,
 | 
				
			||||||
 | 
					                    original_target,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
 | 
					                self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if archive_target:
 | 
					                if archive_target:
 | 
				
			||||||
                    self.check_and_copy(
 | 
					                    self.check_and_copy(
 | 
				
			||||||
                        document.archive_path, document.archive_checksum, archive_target
 | 
					                        document.archive_path,
 | 
				
			||||||
 | 
					                        document.archive_checksum,
 | 
				
			||||||
 | 
					                        archive_target,
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # 4. write manifest to target forlder
 | 
					        # 4. write manifest to target forlder
 | 
				
			||||||
@ -243,7 +248,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
                os.remove(f)
 | 
					                os.remove(f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                delete_empty_directories(
 | 
					                delete_empty_directories(
 | 
				
			||||||
                    os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target)
 | 
					                    os.path.abspath(os.path.dirname(f)),
 | 
				
			||||||
 | 
					                    os.path.abspath(self.target),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def check_and_copy(self, source, source_checksum, target):
 | 
					    def check_and_copy(self, source, source_checksum, target):
 | 
				
			||||||
 | 
				
			|||||||
@ -7,16 +7,16 @@ from contextlib import contextmanager
 | 
				
			|||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
from django.db.models.signals import post_save, m2m_changed
 | 
					from django.core.management.base import CommandError
 | 
				
			||||||
 | 
					from django.db.models.signals import m2m_changed
 | 
				
			||||||
 | 
					from django.db.models.signals import post_save
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_ARCHIVE_NAME
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_FILE_NAME
 | 
				
			||||||
 | 
					from documents.settings import EXPORTER_THUMBNAIL_NAME
 | 
				
			||||||
from filelock import FileLock
 | 
					from filelock import FileLock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents.models import Document
 | 
					 | 
				
			||||||
from documents.settings import (
 | 
					 | 
				
			||||||
    EXPORTER_FILE_NAME,
 | 
					 | 
				
			||||||
    EXPORTER_THUMBNAIL_NAME,
 | 
					 | 
				
			||||||
    EXPORTER_ARCHIVE_NAME,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from ...file_handling import create_source_path_directory
 | 
					from ...file_handling import create_source_path_directory
 | 
				
			||||||
from ...signals.handlers import update_filename_and_move_files
 | 
					from ...signals.handlers import update_filename_and_move_files
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -36,7 +36,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
        Using a manifest.json file, load the data from there, and import the
 | 
					        Using a manifest.json file, load the data from there, and import the
 | 
				
			||||||
        documents it refers to.
 | 
					        documents it refers to.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -73,7 +74,9 @@ class Command(BaseCommand):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self._check_manifest()
 | 
					        self._check_manifest()
 | 
				
			||||||
        with disable_signal(
 | 
					        with disable_signal(
 | 
				
			||||||
            post_save, receiver=update_filename_and_move_files, sender=Document
 | 
					            post_save,
 | 
				
			||||||
 | 
					            receiver=update_filename_and_move_files,
 | 
				
			||||||
 | 
					            sender=Document,
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            with disable_signal(
 | 
					            with disable_signal(
 | 
				
			||||||
                m2m_changed,
 | 
					                m2m_changed,
 | 
				
			||||||
@ -92,7 +95,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
    def _check_manifest_exists(path):
 | 
					    def _check_manifest_exists(path):
 | 
				
			||||||
        if not os.path.exists(path):
 | 
					        if not os.path.exists(path):
 | 
				
			||||||
            raise CommandError(
 | 
					            raise CommandError(
 | 
				
			||||||
                "That directory doesn't appear to contain a manifest.json " "file."
 | 
					                "That directory doesn't appear to contain a manifest.json " "file.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _check_manifest(self):
 | 
					    def _check_manifest(self):
 | 
				
			||||||
@ -105,14 +108,14 @@ class Command(BaseCommand):
 | 
				
			|||||||
            if EXPORTER_FILE_NAME not in record:
 | 
					            if EXPORTER_FILE_NAME not in record:
 | 
				
			||||||
                raise CommandError(
 | 
					                raise CommandError(
 | 
				
			||||||
                    "The manifest file contains a record which does not "
 | 
					                    "The manifest file contains a record which does not "
 | 
				
			||||||
                    "refer to an actual document file."
 | 
					                    "refer to an actual document file.",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            doc_file = record[EXPORTER_FILE_NAME]
 | 
					            doc_file = record[EXPORTER_FILE_NAME]
 | 
				
			||||||
            if not os.path.exists(os.path.join(self.source, doc_file)):
 | 
					            if not os.path.exists(os.path.join(self.source, doc_file)):
 | 
				
			||||||
                raise CommandError(
 | 
					                raise CommandError(
 | 
				
			||||||
                    'The manifest file refers to "{}" which does not '
 | 
					                    'The manifest file refers to "{}" which does not '
 | 
				
			||||||
                    "appear to be in the source directory.".format(doc_file)
 | 
					                    "appear to be in the source directory.".format(doc_file),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if EXPORTER_ARCHIVE_NAME in record:
 | 
					            if EXPORTER_ARCHIVE_NAME in record:
 | 
				
			||||||
@ -120,7 +123,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
                if not os.path.exists(os.path.join(self.source, archive_file)):
 | 
					                if not os.path.exists(os.path.join(self.source, archive_file)):
 | 
				
			||||||
                    raise CommandError(
 | 
					                    raise CommandError(
 | 
				
			||||||
                        f"The manifest file refers to {archive_file} which "
 | 
					                        f"The manifest file refers to {archive_file} which "
 | 
				
			||||||
                        f"does not appear to be in the source directory."
 | 
					                        f"does not appear to be in the source directory.",
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _import_files_from_manifest(self, progress_bar_disable):
 | 
					    def _import_files_from_manifest(self, progress_bar_disable):
 | 
				
			||||||
@ -132,7 +135,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
        print("Copy files into paperless...")
 | 
					        print("Copy files into paperless...")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        manifest_documents = list(
 | 
					        manifest_documents = list(
 | 
				
			||||||
            filter(lambda r: r["model"] == "documents.document", self.manifest)
 | 
					            filter(lambda r: r["model"] == "documents.document", self.manifest),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):
 | 
					        for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,7 @@
 | 
				
			|||||||
from django.core.management import BaseCommand
 | 
					from django.core.management import BaseCommand
 | 
				
			||||||
from django.db import transaction
 | 
					from django.db import transaction
 | 
				
			||||||
 | 
					from documents.tasks import index_optimize
 | 
				
			||||||
from documents.tasks import index_reindex, index_optimize
 | 
					from documents.tasks import index_reindex
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Command(BaseCommand):
 | 
					class Command(BaseCommand):
 | 
				
			||||||
 | 
				
			|||||||
@ -3,7 +3,6 @@ import logging
 | 
				
			|||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django.core.management.base import BaseCommand
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
from django.db.models.signals import post_save
 | 
					from django.db.models.signals import post_save
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -12,7 +11,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
        This will rename all documents to match the latest filename format.
 | 
					        This will rename all documents to match the latest filename format.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -28,6 +28,7 @@ class Command(BaseCommand):
 | 
				
			|||||||
        logging.getLogger().handlers[0].level = logging.ERROR
 | 
					        logging.getLogger().handlers[0].level = logging.ERROR
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        for document in tqdm.tqdm(
 | 
					        for document in tqdm.tqdm(
 | 
				
			||||||
            Document.objects.all(), disable=options["no_progress_bar"]
 | 
					            Document.objects.all(),
 | 
				
			||||||
 | 
					            disable=options["no_progress_bar"],
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            post_save.send(Document, instance=document)
 | 
					            post_save.send(Document, instance=document)
 | 
				
			||||||
 | 
				
			|||||||
@ -2,10 +2,12 @@ import logging
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django.core.management.base import BaseCommand
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.classifier import load_classifier
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
from ...signals.handlers import set_correspondent, set_document_type, set_tags
 | 
					
 | 
				
			||||||
 | 
					from ...signals.handlers import set_correspondent
 | 
				
			||||||
 | 
					from ...signals.handlers import set_document_type
 | 
				
			||||||
 | 
					from ...signals.handlers import set_tags
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.management.retagger")
 | 
					logger = logging.getLogger("paperless.management.retagger")
 | 
				
			||||||
@ -19,7 +21,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
        back-tag all previously indexed documents with metadata created (or
 | 
					        back-tag all previously indexed documents with metadata created (or
 | 
				
			||||||
        modified) after their initial import.
 | 
					        modified) after their initial import.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -57,7 +60,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
            help="Return the suggestion, don't change anything.",
 | 
					            help="Return the suggestion, don't change anything.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        parser.add_argument(
 | 
					        parser.add_argument(
 | 
				
			||||||
            "--base-url", help="The base URL to use to build the link to the documents."
 | 
					            "--base-url",
 | 
				
			||||||
 | 
					            help="The base URL to use to build the link to the documents.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handle(self, *args, **options):
 | 
					    def handle(self, *args, **options):
 | 
				
			||||||
 | 
				
			|||||||
@ -7,7 +7,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
        This command checks your document archive for issues.
 | 
					        This command checks your document archive for issues.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
 | 
				
			|||||||
@ -5,8 +5,8 @@ import shutil
 | 
				
			|||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django import db
 | 
					from django import db
 | 
				
			||||||
from django.core.management.base import BaseCommand
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ...parsers import get_parser_class_for_mime_type
 | 
					from ...parsers import get_parser_class_for_mime_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -22,7 +22,9 @@ def _process_document(doc_in):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        thumb = parser.get_optimised_thumbnail(
 | 
					        thumb = parser.get_optimised_thumbnail(
 | 
				
			||||||
            document.source_path, document.mime_type, document.get_public_filename()
 | 
					            document.source_path,
 | 
				
			||||||
 | 
					            document.mime_type,
 | 
				
			||||||
 | 
					            document.get_public_filename(),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        shutil.move(thumb, document.thumbnail_path)
 | 
					        shutil.move(thumb, document.thumbnail_path)
 | 
				
			||||||
@ -35,7 +37,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
        This will regenerate the thumbnails for all documents.
 | 
					        This will regenerate the thumbnails for all documents.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def add_arguments(self, parser):
 | 
					    def add_arguments(self, parser):
 | 
				
			||||||
@ -76,5 +79,5 @@ class Command(BaseCommand):
 | 
				
			|||||||
                    pool.imap_unordered(_process_document, ids),
 | 
					                    pool.imap_unordered(_process_document, ids),
 | 
				
			||||||
                    total=len(ids),
 | 
					                    total=len(ids),
 | 
				
			||||||
                    disable=options["no_progress_bar"],
 | 
					                    disable=options["no_progress_bar"],
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
				
			|||||||
@ -2,7 +2,7 @@ import logging
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.core.management.base import BaseCommand, CommandError
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.management.superuser")
 | 
					logger = logging.getLogger("paperless.management.superuser")
 | 
				
			||||||
@ -13,7 +13,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
        Creates a Django superuser based on env variables.
 | 
					        Creates a Django superuser based on env variables.
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handle(self, *args, **options):
 | 
					    def handle(self, *args, **options):
 | 
				
			||||||
@ -39,5 +40,5 @@ class Command(BaseCommand):
 | 
				
			|||||||
            self.stdout.write(f'Did not create superuser "{username}".')
 | 
					            self.stdout.write(f'Did not create superuser "{username}".')
 | 
				
			||||||
            self.stdout.write(
 | 
					            self.stdout.write(
 | 
				
			||||||
                'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
 | 
					                'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
 | 
				
			||||||
                '"docker-compose.env" file.'
 | 
					                '"docker-compose.env" file.',
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,10 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.models import MatchingModel, Correspondent, DocumentType, Tag
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import MatchingModel
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.matching")
 | 
					logger = logging.getLogger("paperless.matching")
 | 
				
			||||||
@ -12,7 +14,7 @@ def log_reason(matching_model, document, reason):
 | 
				
			|||||||
    class_name = type(matching_model).__name__
 | 
					    class_name = type(matching_model).__name__
 | 
				
			||||||
    logger.debug(
 | 
					    logger.debug(
 | 
				
			||||||
        f"{class_name} {matching_model.name} matched on document "
 | 
					        f"{class_name} {matching_model.name} matched on document "
 | 
				
			||||||
        f"{document} because {reason}"
 | 
					        f"{document} because {reason}",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -25,7 +27,7 @@ def match_correspondents(document, classifier):
 | 
				
			|||||||
    correspondents = Correspondent.objects.all()
 | 
					    correspondents = Correspondent.objects.all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return list(
 | 
					    return list(
 | 
				
			||||||
        filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
 | 
					        filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -38,7 +40,7 @@ def match_document_types(document, classifier):
 | 
				
			|||||||
    document_types = DocumentType.objects.all()
 | 
					    document_types = DocumentType.objects.all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return list(
 | 
					    return list(
 | 
				
			||||||
        filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
 | 
					        filter(lambda o: matches(o, document) or o.pk == pred_id, document_types),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -51,7 +53,7 @@ def match_tags(document, classifier):
 | 
				
			|||||||
    tags = Tag.objects.all()
 | 
					    tags = Tag.objects.all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return list(
 | 
					    return list(
 | 
				
			||||||
        filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
 | 
					        filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -92,7 +94,7 @@ def matches(matching_model, document):
 | 
				
			|||||||
                rf"\b{re.escape(matching_model.match)}\b",
 | 
					                rf"\b{re.escape(matching_model.match)}\b",
 | 
				
			||||||
                document_content,
 | 
					                document_content,
 | 
				
			||||||
                **search_kwargs,
 | 
					                **search_kwargs,
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        if result:
 | 
					        if result:
 | 
				
			||||||
            log_reason(
 | 
					            log_reason(
 | 
				
			||||||
@ -105,11 +107,12 @@ def matches(matching_model, document):
 | 
				
			|||||||
    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
 | 
					    elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            match = re.search(
 | 
					            match = re.search(
 | 
				
			||||||
                re.compile(matching_model.match, **search_kwargs), document_content
 | 
					                re.compile(matching_model.match, **search_kwargs),
 | 
				
			||||||
 | 
					                document_content,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        except re.error:
 | 
					        except re.error:
 | 
				
			||||||
            logger.error(
 | 
					            logger.error(
 | 
				
			||||||
                f"Error while processing regular expression " f"{matching_model.match}"
 | 
					                f"Error while processing regular expression " f"{matching_model.match}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            return False
 | 
					            return False
 | 
				
			||||||
        if match:
 | 
					        if match:
 | 
				
			||||||
 | 
				
			|||||||
@ -5,17 +5,14 @@ import os
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
from collections import OrderedDict
 | 
					from collections import OrderedDict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import pathvalidate
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import dateutil.parser
 | 
					import dateutil.parser
 | 
				
			||||||
 | 
					import pathvalidate
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.db import models
 | 
					from django.db import models
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
from django.utils.timezone import is_aware
 | 
					from django.utils.timezone import is_aware
 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.parsers import get_default_file_extension
 | 
					from documents.parsers import get_default_file_extension
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -42,7 +39,9 @@ class MatchingModel(models.Model):
 | 
				
			|||||||
    match = models.CharField(_("match"), max_length=256, blank=True)
 | 
					    match = models.CharField(_("match"), max_length=256, blank=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    matching_algorithm = models.PositiveIntegerField(
 | 
					    matching_algorithm = models.PositiveIntegerField(
 | 
				
			||||||
        _("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY
 | 
					        _("matching algorithm"),
 | 
				
			||||||
 | 
					        choices=MATCHING_ALGORITHMS,
 | 
				
			||||||
 | 
					        default=MATCH_ANY,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    is_insensitive = models.BooleanField(_("is insensitive"), default=True)
 | 
					    is_insensitive = models.BooleanField(_("is insensitive"), default=True)
 | 
				
			||||||
@ -71,7 +70,7 @@ class Tag(MatchingModel):
 | 
				
			|||||||
        default=False,
 | 
					        default=False,
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "Marks this tag as an inbox tag: All newly consumed "
 | 
					            "Marks this tag as an inbox tag: All newly consumed "
 | 
				
			||||||
            "documents will be tagged with inbox tags."
 | 
					            "documents will be tagged with inbox tags.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -120,14 +119,17 @@ class Document(models.Model):
 | 
				
			|||||||
        blank=True,
 | 
					        blank=True,
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "The raw, text-only data of the document. This field is "
 | 
					            "The raw, text-only data of the document. This field is "
 | 
				
			||||||
            "primarily used for searching."
 | 
					            "primarily used for searching.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
 | 
					    mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tags = models.ManyToManyField(
 | 
					    tags = models.ManyToManyField(
 | 
				
			||||||
        Tag, related_name="documents", blank=True, verbose_name=_("tags")
 | 
					        Tag,
 | 
				
			||||||
 | 
					        related_name="documents",
 | 
				
			||||||
 | 
					        blank=True,
 | 
				
			||||||
 | 
					        verbose_name=_("tags"),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    checksum = models.CharField(
 | 
					    checksum = models.CharField(
 | 
				
			||||||
@ -150,7 +152,10 @@ class Document(models.Model):
 | 
				
			|||||||
    created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
 | 
					    created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    modified = models.DateTimeField(
 | 
					    modified = models.DateTimeField(
 | 
				
			||||||
        _("modified"), auto_now=True, editable=False, db_index=True
 | 
					        _("modified"),
 | 
				
			||||||
 | 
					        auto_now=True,
 | 
				
			||||||
 | 
					        editable=False,
 | 
				
			||||||
 | 
					        db_index=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    storage_type = models.CharField(
 | 
					    storage_type = models.CharField(
 | 
				
			||||||
@ -162,7 +167,10 @@ class Document(models.Model):
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    added = models.DateTimeField(
 | 
					    added = models.DateTimeField(
 | 
				
			||||||
        _("added"), default=timezone.now, editable=False, db_index=True
 | 
					        _("added"),
 | 
				
			||||||
 | 
					        default=timezone.now,
 | 
				
			||||||
 | 
					        editable=False,
 | 
				
			||||||
 | 
					        db_index=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    filename = models.FilePathField(
 | 
					    filename = models.FilePathField(
 | 
				
			||||||
@ -192,7 +200,7 @@ class Document(models.Model):
 | 
				
			|||||||
        unique=True,
 | 
					        unique=True,
 | 
				
			||||||
        db_index=True,
 | 
					        db_index=True,
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "The position of this document in your physical document " "archive."
 | 
					            "The position of this document in your physical document " "archive.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -289,7 +297,9 @@ class Log(models.Model):
 | 
				
			|||||||
    message = models.TextField(_("message"))
 | 
					    message = models.TextField(_("message"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    level = models.PositiveIntegerField(
 | 
					    level = models.PositiveIntegerField(
 | 
				
			||||||
        _("level"), choices=LEVELS, default=logging.INFO
 | 
					        _("level"),
 | 
				
			||||||
 | 
					        choices=LEVELS,
 | 
				
			||||||
 | 
					        default=logging.INFO,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    created = models.DateTimeField(_("created"), auto_now_add=True)
 | 
					    created = models.DateTimeField(_("created"), auto_now_add=True)
 | 
				
			||||||
@ -321,7 +331,10 @@ class SavedView(models.Model):
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    sort_field = models.CharField(
 | 
					    sort_field = models.CharField(
 | 
				
			||||||
        _("sort field"), max_length=128, null=True, blank=True
 | 
					        _("sort field"),
 | 
				
			||||||
 | 
					        max_length=128,
 | 
				
			||||||
 | 
					        null=True,
 | 
				
			||||||
 | 
					        blank=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    sort_reverse = models.BooleanField(_("sort reverse"), default=False)
 | 
					    sort_reverse = models.BooleanField(_("sort reverse"), default=False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -383,11 +396,16 @@ class FileInfo:
 | 
				
			|||||||
                ),
 | 
					                ),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            ("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
 | 
					            ("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
 | 
				
			||||||
        ]
 | 
					        ],
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(
 | 
					    def __init__(
 | 
				
			||||||
        self, created=None, correspondent=None, title=None, tags=(), extension=None
 | 
					        self,
 | 
				
			||||||
 | 
					        created=None,
 | 
				
			||||||
 | 
					        correspondent=None,
 | 
				
			||||||
 | 
					        title=None,
 | 
				
			||||||
 | 
					        tags=(),
 | 
				
			||||||
 | 
					        extension=None,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.created = created
 | 
					        self.created = created
 | 
				
			||||||
 | 
				
			|||||||
@ -9,6 +9,8 @@ import tempfile
 | 
				
			|||||||
import magic
 | 
					import magic
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					from documents.loggers import LoggingMixin
 | 
				
			||||||
 | 
					from documents.signals import document_consumer_declaration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# This regular expression will try to find dates in the document at
 | 
					# This regular expression will try to find dates in the document at
 | 
				
			||||||
# hand and will match the following formats:
 | 
					# hand and will match the following formats:
 | 
				
			||||||
@ -21,17 +23,15 @@ from django.utils import timezone
 | 
				
			|||||||
# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 | 
					# - XX. MONTH ZZZZ with XX being 1 or 2 and ZZZZ being 2 or 4 digits
 | 
				
			||||||
# - MONTH ZZZZ, with ZZZZ being 4 digits
 | 
					# - MONTH ZZZZ, with ZZZZ being 4 digits
 | 
				
			||||||
# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 | 
					# - MONTH XX, ZZZZ with XX being 1 or 2 and ZZZZ being 4 digits
 | 
				
			||||||
from documents.loggers import LoggingMixin
 | 
					 | 
				
			||||||
from documents.signals import document_consumer_declaration
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# TODO: isnt there a date parsing library for this?
 | 
					# TODO: isnt there a date parsing library for this?
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DATE_REGEX = re.compile(
 | 
					DATE_REGEX = re.compile(
 | 
				
			||||||
    r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"  # NOQA: E501
 | 
					    r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|"  # noqa: E501
 | 
				
			||||||
    r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"  # NOQA: E501
 | 
					    r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|"  # noqa: E501
 | 
				
			||||||
    r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"  # NOQA: E501
 | 
					    r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|"  # noqa: E501
 | 
				
			||||||
    r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
 | 
					    r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
 | 
				
			||||||
    r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))"
 | 
					    r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -3,9 +3,8 @@ import logging
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from tqdm import tqdm
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from tqdm import tqdm
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class SanityCheckMessages:
 | 
					class SanityCheckMessages:
 | 
				
			||||||
@ -88,19 +87,19 @@ def check_sanity(progress=False):
 | 
				
			|||||||
                if not checksum == doc.checksum:
 | 
					                if not checksum == doc.checksum:
 | 
				
			||||||
                    messages.error(
 | 
					                    messages.error(
 | 
				
			||||||
                        f"Checksum mismatch of document {doc.pk}. "
 | 
					                        f"Checksum mismatch of document {doc.pk}. "
 | 
				
			||||||
                        f"Stored: {doc.checksum}, actual: {checksum}."
 | 
					                        f"Stored: {doc.checksum}, actual: {checksum}.",
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Check sanity of the archive file.
 | 
					        # Check sanity of the archive file.
 | 
				
			||||||
        if doc.archive_checksum and not doc.archive_filename:
 | 
					        if doc.archive_checksum and not doc.archive_filename:
 | 
				
			||||||
            messages.error(
 | 
					            messages.error(
 | 
				
			||||||
                f"Document {doc.pk} has an archive file checksum, but no "
 | 
					                f"Document {doc.pk} has an archive file checksum, but no "
 | 
				
			||||||
                f"archive filename."
 | 
					                f"archive filename.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        elif not doc.archive_checksum and doc.archive_filename:
 | 
					        elif not doc.archive_checksum and doc.archive_filename:
 | 
				
			||||||
            messages.error(
 | 
					            messages.error(
 | 
				
			||||||
                f"Document {doc.pk} has an archive file, but its checksum is "
 | 
					                f"Document {doc.pk} has an archive file, but its checksum is "
 | 
				
			||||||
                f"missing."
 | 
					                f"missing.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        elif doc.has_archive_version:
 | 
					        elif doc.has_archive_version:
 | 
				
			||||||
            if not os.path.isfile(doc.archive_path):
 | 
					            if not os.path.isfile(doc.archive_path):
 | 
				
			||||||
@ -113,7 +112,7 @@ def check_sanity(progress=False):
 | 
				
			|||||||
                        checksum = hashlib.md5(f.read()).hexdigest()
 | 
					                        checksum = hashlib.md5(f.read()).hexdigest()
 | 
				
			||||||
                except OSError as e:
 | 
					                except OSError as e:
 | 
				
			||||||
                    messages.error(
 | 
					                    messages.error(
 | 
				
			||||||
                        f"Cannot read archive file of document {doc.pk}: {e}"
 | 
					                        f"Cannot read archive file of document {doc.pk}: {e}",
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
                    if not checksum == doc.archive_checksum:
 | 
					                    if not checksum == doc.archive_checksum:
 | 
				
			||||||
@ -121,7 +120,7 @@ def check_sanity(progress=False):
 | 
				
			|||||||
                            f"Checksum mismatch of archived document "
 | 
					                            f"Checksum mismatch of archived document "
 | 
				
			||||||
                            f"{doc.pk}. "
 | 
					                            f"{doc.pk}. "
 | 
				
			||||||
                            f"Stored: {doc.archive_checksum}, "
 | 
					                            f"Stored: {doc.archive_checksum}, "
 | 
				
			||||||
                            f"actual: {checksum}."
 | 
					                            f"actual: {checksum}.",
 | 
				
			||||||
                        )
 | 
					                        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # other document checks
 | 
					        # other document checks
 | 
				
			||||||
 | 
				
			|||||||
@ -1,25 +1,22 @@
 | 
				
			|||||||
 | 
					import math
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import magic
 | 
					import magic
 | 
				
			||||||
import math
 | 
					 | 
				
			||||||
from django.utils.text import slugify
 | 
					from django.utils.text import slugify
 | 
				
			||||||
 | 
					from django.utils.translation import gettext as _
 | 
				
			||||||
from rest_framework import serializers
 | 
					from rest_framework import serializers
 | 
				
			||||||
from rest_framework.fields import SerializerMethodField
 | 
					from rest_framework.fields import SerializerMethodField
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from . import bulk_edit
 | 
					from . import bulk_edit
 | 
				
			||||||
from .models import (
 | 
					from .models import Correspondent
 | 
				
			||||||
    Correspondent,
 | 
					from .models import Document
 | 
				
			||||||
    Tag,
 | 
					from .models import DocumentType
 | 
				
			||||||
    Document,
 | 
					from .models import MatchingModel
 | 
				
			||||||
    DocumentType,
 | 
					from .models import SavedView
 | 
				
			||||||
    SavedView,
 | 
					from .models import SavedViewFilterRule
 | 
				
			||||||
    SavedViewFilterRule,
 | 
					from .models import Tag
 | 
				
			||||||
    MatchingModel,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from .parsers import is_mime_type_supported
 | 
					from .parsers import is_mime_type_supported
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.utils.translation import gettext as _
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
# https://www.django-rest-framework.org/api-guide/serializers/#example
 | 
					# https://www.django-rest-framework.org/api-guide/serializers/#example
 | 
				
			||||||
class DynamicFieldsModelSerializer(serializers.ModelSerializer):
 | 
					class DynamicFieldsModelSerializer(serializers.ModelSerializer):
 | 
				
			||||||
@ -56,12 +53,12 @@ class MatchingModelSerializer(serializers.ModelSerializer):
 | 
				
			|||||||
        if (
 | 
					        if (
 | 
				
			||||||
            "matching_algorithm" in self.initial_data
 | 
					            "matching_algorithm" in self.initial_data
 | 
				
			||||||
            and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
 | 
					            and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
 | 
				
			||||||
        ):  # NOQA: E501
 | 
					        ):
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                re.compile(match)
 | 
					                re.compile(match)
 | 
				
			||||||
            except Exception as e:
 | 
					            except re.error as e:
 | 
				
			||||||
                raise serializers.ValidationError(
 | 
					                raise serializers.ValidationError(
 | 
				
			||||||
                    _("Invalid regular expression: %(error)s") % {"error": str(e)}
 | 
					                    _("Invalid regular expression: %(error)s") % {"error": str(e.msg)},
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
        return match
 | 
					        return match
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -156,7 +153,7 @@ class TagSerializer(MatchingModelSerializer):
 | 
				
			|||||||
            luminance = math.sqrt(
 | 
					            luminance = math.sqrt(
 | 
				
			||||||
                0.299 * math.pow(rgb[0], 2)
 | 
					                0.299 * math.pow(rgb[0], 2)
 | 
				
			||||||
                + 0.587 * math.pow(rgb[1], 2)
 | 
					                + 0.587 * math.pow(rgb[1], 2)
 | 
				
			||||||
                + 0.114 * math.pow(rgb[2], 2)
 | 
					                + 0.114 * math.pow(rgb[2], 2),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            return "#ffffff" if luminance < 0.53 else "#000000"
 | 
					            return "#ffffff" if luminance < 0.53 else "#000000"
 | 
				
			||||||
        except ValueError:
 | 
					        except ValueError:
 | 
				
			||||||
@ -298,7 +295,7 @@ class DocumentListSerializer(serializers.Serializer):
 | 
				
			|||||||
        count = Document.objects.filter(id__in=documents).count()
 | 
					        count = Document.objects.filter(id__in=documents).count()
 | 
				
			||||||
        if not count == len(documents):
 | 
					        if not count == len(documents):
 | 
				
			||||||
            raise serializers.ValidationError(
 | 
					            raise serializers.ValidationError(
 | 
				
			||||||
                f"Some documents in {name} don't exist or were " f"specified twice."
 | 
					                f"Some documents in {name} don't exist or were " f"specified twice.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def validate_documents(self, documents):
 | 
					    def validate_documents(self, documents):
 | 
				
			||||||
@ -331,7 +328,7 @@ class BulkEditSerializer(DocumentListSerializer):
 | 
				
			|||||||
        count = Tag.objects.filter(id__in=tags).count()
 | 
					        count = Tag.objects.filter(id__in=tags).count()
 | 
				
			||||||
        if not count == len(tags):
 | 
					        if not count == len(tags):
 | 
				
			||||||
            raise serializers.ValidationError(
 | 
					            raise serializers.ValidationError(
 | 
				
			||||||
                f"Some tags in {name} don't exist or were specified twice."
 | 
					                f"Some tags in {name} don't exist or were specified twice.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def validate_method(self, method):
 | 
					    def validate_method(self, method):
 | 
				
			||||||
@ -456,7 +453,7 @@ class PostDocumentSerializer(serializers.Serializer):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        if not is_mime_type_supported(mime_type):
 | 
					        if not is_mime_type_supported(mime_type):
 | 
				
			||||||
            raise serializers.ValidationError(
 | 
					            raise serializers.ValidationError(
 | 
				
			||||||
                _("File type %(type)s not supported") % {"type": mime_type}
 | 
					                _("File type %(type)s not supported") % {"type": mime_type},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return document.name, document_data
 | 
					        return document.name, document_data
 | 
				
			||||||
@ -483,11 +480,13 @@ class PostDocumentSerializer(serializers.Serializer):
 | 
				
			|||||||
class BulkDownloadSerializer(DocumentListSerializer):
 | 
					class BulkDownloadSerializer(DocumentListSerializer):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    content = serializers.ChoiceField(
 | 
					    content = serializers.ChoiceField(
 | 
				
			||||||
        choices=["archive", "originals", "both"], default="archive"
 | 
					        choices=["archive", "originals", "both"],
 | 
				
			||||||
 | 
					        default="archive",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    compression = serializers.ChoiceField(
 | 
					    compression = serializers.ChoiceField(
 | 
				
			||||||
        choices=["none", "deflated", "bzip2", "lzma"], default="none"
 | 
					        choices=["none", "deflated", "bzip2", "lzma"],
 | 
				
			||||||
 | 
					        default="none",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def validate_compression(self, compression):
 | 
					    def validate_compression(self, compression):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,24 +1,26 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.utils import termcolors
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.contrib.admin.models import ADDITION, LogEntry
 | 
					from django.contrib.admin.models import ADDITION
 | 
				
			||||||
 | 
					from django.contrib.admin.models import LogEntry
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.contrib.contenttypes.models import ContentType
 | 
					from django.contrib.contenttypes.models import ContentType
 | 
				
			||||||
from django.db import models, DatabaseError
 | 
					from django.db import DatabaseError
 | 
				
			||||||
 | 
					from django.db import models
 | 
				
			||||||
from django.db.models import Q
 | 
					from django.db.models import Q
 | 
				
			||||||
from django.dispatch import receiver
 | 
					from django.dispatch import receiver
 | 
				
			||||||
from django.utils import termcolors, timezone
 | 
					from django.utils import termcolors
 | 
				
			||||||
 | 
					from django.utils import timezone
 | 
				
			||||||
from filelock import FileLock
 | 
					from filelock import FileLock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .. import matching
 | 
					from .. import matching
 | 
				
			||||||
from ..file_handling import (
 | 
					from ..file_handling import create_source_path_directory
 | 
				
			||||||
    delete_empty_directories,
 | 
					from ..file_handling import delete_empty_directories
 | 
				
			||||||
    create_source_path_directory,
 | 
					from ..file_handling import generate_unique_filename
 | 
				
			||||||
    generate_unique_filename,
 | 
					from ..models import Document
 | 
				
			||||||
)
 | 
					from ..models import MatchingModel
 | 
				
			||||||
from ..models import Document, Tag, MatchingModel
 | 
					from ..models import Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.handlers")
 | 
					logger = logging.getLogger("paperless.handlers")
 | 
				
			||||||
@ -72,7 +74,7 @@ def set_correspondent(
 | 
				
			|||||||
                print(
 | 
					                print(
 | 
				
			||||||
                    termcolors.colorize(str(document), fg="green")
 | 
					                    termcolors.colorize(str(document), fg="green")
 | 
				
			||||||
                    if color
 | 
					                    if color
 | 
				
			||||||
                    else str(document)
 | 
					                    else str(document),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                print(f"{base_url}/documents/{document.pk}")
 | 
					                print(f"{base_url}/documents/{document.pk}")
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
@ -82,7 +84,7 @@ def set_correspondent(
 | 
				
			|||||||
                        if color
 | 
					                        if color
 | 
				
			||||||
                        else str(document)
 | 
					                        else str(document)
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    + f" [{document.pk}]"
 | 
					                    + f" [{document.pk}]",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            print(f"Suggest correspondent {selected}")
 | 
					            print(f"Suggest correspondent {selected}")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -139,7 +141,7 @@ def set_document_type(
 | 
				
			|||||||
                print(
 | 
					                print(
 | 
				
			||||||
                    termcolors.colorize(str(document), fg="green")
 | 
					                    termcolors.colorize(str(document), fg="green")
 | 
				
			||||||
                    if color
 | 
					                    if color
 | 
				
			||||||
                    else str(document)
 | 
					                    else str(document),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                print(f"{base_url}/documents/{document.pk}")
 | 
					                print(f"{base_url}/documents/{document.pk}")
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
@ -149,7 +151,7 @@ def set_document_type(
 | 
				
			|||||||
                        if color
 | 
					                        if color
 | 
				
			||||||
                        else str(document)
 | 
					                        else str(document)
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    + f" [{document.pk}]"
 | 
					                    + f" [{document.pk}]",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            print(f"Suggest document type {selected}")
 | 
					            print(f"Suggest document type {selected}")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -176,9 +178,9 @@ def set_tags(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    if replace:
 | 
					    if replace:
 | 
				
			||||||
        Document.tags.through.objects.filter(document=document).exclude(
 | 
					        Document.tags.through.objects.filter(document=document).exclude(
 | 
				
			||||||
            Q(tag__is_inbox_tag=True)
 | 
					            Q(tag__is_inbox_tag=True),
 | 
				
			||||||
        ).exclude(
 | 
					        ).exclude(
 | 
				
			||||||
            Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
 | 
					            Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO),
 | 
				
			||||||
        ).delete()
 | 
					        ).delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    current_tags = set(document.tags.all())
 | 
					    current_tags = set(document.tags.all())
 | 
				
			||||||
@ -198,7 +200,7 @@ def set_tags(
 | 
				
			|||||||
            print(
 | 
					            print(
 | 
				
			||||||
                termcolors.colorize(str(document), fg="green")
 | 
					                termcolors.colorize(str(document), fg="green")
 | 
				
			||||||
                if color
 | 
					                if color
 | 
				
			||||||
                else str(document)
 | 
					                else str(document),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            print(f"{base_url}/documents/{document.pk}")
 | 
					            print(f"{base_url}/documents/{document.pk}")
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -208,7 +210,7 @@ def set_tags(
 | 
				
			|||||||
                    if color
 | 
					                    if color
 | 
				
			||||||
                    else str(document)
 | 
					                    else str(document)
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                + f" [{document.pk}]"
 | 
					                + f" [{document.pk}]",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        if relevant_tags:
 | 
					        if relevant_tags:
 | 
				
			||||||
            print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
 | 
					            print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
 | 
				
			||||||
@ -254,7 +256,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
				
			|||||||
            except OSError as e:
 | 
					            except OSError as e:
 | 
				
			||||||
                logger.error(
 | 
					                logger.error(
 | 
				
			||||||
                    f"Failed to move {instance.source_path} to trash at "
 | 
					                    f"Failed to move {instance.source_path} to trash at "
 | 
				
			||||||
                    f"{new_file_path}: {e}. Skipping cleanup!"
 | 
					                    f"{new_file_path}: {e}. Skipping cleanup!",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                return
 | 
					                return
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -270,16 +272,18 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
 | 
				
			|||||||
                except OSError as e:
 | 
					                except OSError as e:
 | 
				
			||||||
                    logger.warning(
 | 
					                    logger.warning(
 | 
				
			||||||
                        f"While deleting document {str(instance)}, the file "
 | 
					                        f"While deleting document {str(instance)}, the file "
 | 
				
			||||||
                        f"{filename} could not be deleted: {e}"
 | 
					                        f"{filename} could not be deleted: {e}",
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        delete_empty_directories(
 | 
					        delete_empty_directories(
 | 
				
			||||||
            os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR
 | 
					            os.path.dirname(instance.source_path),
 | 
				
			||||||
 | 
					            root=settings.ORIGINALS_DIR,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if instance.has_archive_version:
 | 
					        if instance.has_archive_version:
 | 
				
			||||||
            delete_empty_directories(
 | 
					            delete_empty_directories(
 | 
				
			||||||
                os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR
 | 
					                os.path.dirname(instance.archive_path),
 | 
				
			||||||
 | 
					                root=settings.ARCHIVE_DIR,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -297,7 +301,7 @@ def validate_move(instance, old_path, new_path):
 | 
				
			|||||||
        # Can't do anything if the new file already exists. Skip updating file.
 | 
					        # Can't do anything if the new file already exists. Skip updating file.
 | 
				
			||||||
        logger.warning(
 | 
					        logger.warning(
 | 
				
			||||||
            f"Document {str(instance)}: Cannot rename file "
 | 
					            f"Document {str(instance)}: Cannot rename file "
 | 
				
			||||||
            f"since target path {new_path} already exists."
 | 
					            f"since target path {new_path} already exists.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        raise CannotMoveFilesException()
 | 
					        raise CannotMoveFilesException()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -331,12 +335,11 @@ def update_filename_and_move_files(sender, instance, **kwargs):
 | 
				
			|||||||
            if instance.has_archive_version:
 | 
					            if instance.has_archive_version:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                instance.archive_filename = generate_unique_filename(
 | 
					                instance.archive_filename = generate_unique_filename(
 | 
				
			||||||
                    instance, archive_filename=True
 | 
					                    instance,
 | 
				
			||||||
 | 
					                    archive_filename=True,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                move_archive = (
 | 
					                move_archive = old_archive_filename != instance.archive_filename
 | 
				
			||||||
                    old_archive_filename != instance.archive_filename
 | 
					 | 
				
			||||||
                )  # NOQA: E501
 | 
					 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                move_archive = False
 | 
					                move_archive = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -374,7 +377,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
 | 
				
			|||||||
                if move_archive and os.path.isfile(instance.archive_path):
 | 
					                if move_archive and os.path.isfile(instance.archive_path):
 | 
				
			||||||
                    os.rename(instance.archive_path, old_archive_path)
 | 
					                    os.rename(instance.archive_path, old_archive_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception:
 | 
				
			||||||
                # This is fine, since:
 | 
					                # This is fine, since:
 | 
				
			||||||
                # A: if we managed to move source from A to B, we will also
 | 
					                # A: if we managed to move source from A to B, we will also
 | 
				
			||||||
                #  manage to move it from B to A. If not, we have a serious
 | 
					                #  manage to move it from B to A. If not, we have a serious
 | 
				
			||||||
@ -393,14 +396,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
 | 
				
			|||||||
        # something has failed above.
 | 
					        # something has failed above.
 | 
				
			||||||
        if not os.path.isfile(old_source_path):
 | 
					        if not os.path.isfile(old_source_path):
 | 
				
			||||||
            delete_empty_directories(
 | 
					            delete_empty_directories(
 | 
				
			||||||
                os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR
 | 
					                os.path.dirname(old_source_path),
 | 
				
			||||||
 | 
					                root=settings.ORIGINALS_DIR,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if instance.has_archive_version and not os.path.isfile(
 | 
					        if instance.has_archive_version and not os.path.isfile(
 | 
				
			||||||
            old_archive_path
 | 
					            old_archive_path,
 | 
				
			||||||
        ):  # NOQA: E501
 | 
					        ):
 | 
				
			||||||
            delete_empty_directories(
 | 
					            delete_empty_directories(
 | 
				
			||||||
                os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR
 | 
					                os.path.dirname(old_archive_path),
 | 
				
			||||||
 | 
					                root=settings.ARCHIVE_DIR,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -3,13 +3,18 @@ import logging
 | 
				
			|||||||
import tqdm
 | 
					import tqdm
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db.models.signals import post_save
 | 
					from django.db.models.signals import post_save
 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from documents import index
 | 
				
			||||||
 | 
					from documents import sanity_checker
 | 
				
			||||||
from documents import index, sanity_checker
 | 
					from documents.classifier import DocumentClassifier
 | 
				
			||||||
from documents.classifier import DocumentClassifier, load_classifier
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
from documents.consumer import Consumer, ConsumerError
 | 
					from documents.consumer import Consumer
 | 
				
			||||||
from documents.models import Document, Tag, DocumentType, Correspondent
 | 
					from documents.consumer import ConsumerError
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.sanity_checker import SanityCheckFailedException
 | 
					from documents.sanity_checker import SanityCheckFailedException
 | 
				
			||||||
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.tasks")
 | 
					logger = logging.getLogger("paperless.tasks")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -47,7 +52,7 @@ def train_classifier():
 | 
				
			|||||||
    try:
 | 
					    try:
 | 
				
			||||||
        if classifier.train():
 | 
					        if classifier.train():
 | 
				
			||||||
            logger.info(
 | 
					            logger.info(
 | 
				
			||||||
                "Saving updated classifier model to {}...".format(settings.MODEL_FILE)
 | 
					                "Saving updated classifier model to {}...".format(settings.MODEL_FILE),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            classifier.save()
 | 
					            classifier.save()
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -82,7 +87,7 @@ def consume_file(
 | 
				
			|||||||
    else:
 | 
					    else:
 | 
				
			||||||
        raise ConsumerError(
 | 
					        raise ConsumerError(
 | 
				
			||||||
            "Unknown error: Returned document was null, but "
 | 
					            "Unknown error: Returned document was null, but "
 | 
				
			||||||
            "no error message was given."
 | 
					            "no error message was given.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,8 @@
 | 
				
			|||||||
from factory import Faker
 | 
					from factory import Faker
 | 
				
			||||||
from factory.django import DjangoModelFactory
 | 
					from factory.django import DjangoModelFactory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..models import Document, Correspondent
 | 
					from ..models import Correspondent
 | 
				
			||||||
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CorrespondentFactory(DjangoModelFactory):
 | 
					class CorrespondentFactory(DjangoModelFactory):
 | 
				
			||||||
 | 
				
			|||||||
@ -3,7 +3,6 @@ from unittest import mock
 | 
				
			|||||||
from django.contrib.admin.sites import AdminSite
 | 
					from django.contrib.admin.sites import AdminSite
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents import index
 | 
					from documents import index
 | 
				
			||||||
from documents.admin import DocumentAdmin
 | 
					from documents.admin import DocumentAdmin
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
@ -42,7 +41,8 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        docs = []
 | 
					        docs = []
 | 
				
			||||||
        for i in range(42):
 | 
					        for i in range(42):
 | 
				
			||||||
            doc = Document.objects.create(
 | 
					            doc = Document.objects.create(
 | 
				
			||||||
                title="Many documents with the same title", checksum=f"{i:02}"
 | 
					                title="Many documents with the same title",
 | 
				
			||||||
 | 
					                checksum=f"{i:02}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            docs.append(doc)
 | 
					            docs.append(doc)
 | 
				
			||||||
            index.add_or_update_document(doc)
 | 
					            index.add_or_update_document(doc)
 | 
				
			||||||
@ -61,6 +61,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_created(self):
 | 
					    def test_created(self):
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", created=timezone.make_aware(timezone.datetime(2020, 4, 12))
 | 
					            title="test",
 | 
				
			||||||
 | 
					            created=timezone.make_aware(timezone.datetime(2020, 4, 12)),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
 | 
					        self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
 | 
				
			||||||
 | 
				
			|||||||
@ -10,22 +10,20 @@ from unittest import mock
 | 
				
			|||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.utils import timezone
 | 
					 | 
				
			||||||
from django.test import override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					from documents import bulk_edit
 | 
				
			||||||
 | 
					from documents import index
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import MatchingModel
 | 
				
			||||||
 | 
					from documents.models import SavedView
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
from rest_framework.test import APITestCase
 | 
					from rest_framework.test import APITestCase
 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from documents import index, bulk_edit
 | 
					 | 
				
			||||||
from documents.models import (
 | 
					 | 
				
			||||||
    Document,
 | 
					 | 
				
			||||||
    Correspondent,
 | 
					 | 
				
			||||||
    DocumentType,
 | 
					 | 
				
			||||||
    Tag,
 | 
					 | 
				
			||||||
    SavedView,
 | 
					 | 
				
			||||||
    MatchingModel,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
					class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			||||||
    def setUp(self):
 | 
					    def setUp(self):
 | 
				
			||||||
@ -72,7 +70,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        returned_doc["title"] = "the new title"
 | 
					        returned_doc["title"] = "the new title"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.put(
 | 
					        response = self.client.put(
 | 
				
			||||||
            "/api/documents/{}/".format(doc.pk), returned_doc, format="json"
 | 
					            "/api/documents/{}/".format(doc.pk),
 | 
				
			||||||
 | 
					            returned_doc,
 | 
				
			||||||
 | 
					            format="json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
@ -127,7 +127,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(len(results[0]), 2)
 | 
					        self.assertEqual(len(results[0]), 2)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?fields=id,conteasdnt", format="json"
 | 
					            "/api/documents/?fields=id,conteasdnt",
 | 
				
			||||||
 | 
					            format="json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -162,7 +163,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb"
 | 
					            os.path.join(self.dirs.thumbnail_dir, "{:07d}.png".format(doc.pk)),
 | 
				
			||||||
 | 
					            "wb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            f.write(content_thumbnail)
 | 
					            f.write(content_thumbnail)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -206,7 +208,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(response.content, content_archive)
 | 
					        self.assertEqual(response.content, content_archive)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/{}/download/?original=true".format(doc.pk)
 | 
					            "/api/documents/{}/download/?original=true".format(doc.pk),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
@ -218,7 +220,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(response.content, content_archive)
 | 
					        self.assertEqual(response.content, content_archive)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/{}/preview/?original=true".format(doc.pk)
 | 
					            "/api/documents/{}/preview/?original=true".format(doc.pk),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
@ -227,7 +229,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_document_actions_not_existing_file(self):
 | 
					    def test_document_actions_not_existing_file(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="none", filename=os.path.basename("asd"), mime_type="application/pdf"
 | 
					            title="none",
 | 
				
			||||||
 | 
					            filename=os.path.basename("asd"),
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get("/api/documents/{}/download/".format(doc.pk))
 | 
					        response = self.client.get("/api/documents/{}/download/".format(doc.pk))
 | 
				
			||||||
@ -242,13 +246,19 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_document_filters(self):
 | 
					    def test_document_filters(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="none1", checksum="A", mime_type="application/pdf"
 | 
					            title="none1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="none2", checksum="B", mime_type="application/pdf"
 | 
					            title="none2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc3 = Document.objects.create(
 | 
					        doc3 = Document.objects.create(
 | 
				
			||||||
            title="none3", checksum="C", mime_type="application/pdf"
 | 
					            title="none3",
 | 
				
			||||||
 | 
					            checksum="C",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
 | 
					        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
 | 
				
			||||||
@ -273,7 +283,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
 | 
					        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id)
 | 
					            "/api/documents/?tags__id__in={},{}".format(tag_inbox.id, tag_3.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -281,7 +291,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
 | 
					        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc3.id])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id)
 | 
					            "/api/documents/?tags__id__in={},{}".format(tag_2.id, tag_3.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -289,7 +299,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
 | 
					        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc2.id, doc3.id])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id)
 | 
					            "/api/documents/?tags__id__all={},{}".format(tag_2.id, tag_3.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -297,14 +307,14 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(results[0]["id"], doc3.id)
 | 
					        self.assertEqual(results[0]["id"], doc3.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id)
 | 
					            "/api/documents/?tags__id__all={},{}".format(tag_inbox.id, tag_3.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
        self.assertEqual(len(results), 0)
 | 
					        self.assertEqual(len(results), 0)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id)
 | 
					            "/api/documents/?tags__id__all={}a{}".format(tag_inbox.id, tag_3.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -317,7 +327,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
 | 
					        self.assertCountEqual([results[0]["id"], results[1]["id"]], [doc1.id, doc2.id])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id)
 | 
					            "/api/documents/?tags__id__none={},{}".format(tag_3.id, tag_2.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -325,7 +335,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(results[0]["id"], doc1.id)
 | 
					        self.assertEqual(results[0]["id"], doc1.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get(
 | 
					        response = self.client.get(
 | 
				
			||||||
            "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id)
 | 
					            "/api/documents/?tags__id__none={},{}".format(tag_2.id, tag_inbox.id),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        results = response.data["results"]
 | 
					        results = response.data["results"]
 | 
				
			||||||
@ -443,7 +453,7 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        for i in range(1, 6):
 | 
					        for i in range(1, 6):
 | 
				
			||||||
            response = self.client.get(
 | 
					            response = self.client.get(
 | 
				
			||||||
                f"/api/documents/?query=content&page={i}&page_size=10"
 | 
					                f"/api/documents/?query=content&page={i}&page_size=10",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            results = response.data["results"]
 | 
					            results = response.data["results"]
 | 
				
			||||||
            self.assertEqual(response.data["count"], 55)
 | 
					            self.assertEqual(response.data["count"], 55)
 | 
				
			||||||
@ -595,31 +605,35 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
 | 
					        self.assertCountEqual(search_query("&correspondent__id=" + str(c.id)), [d1.id])
 | 
				
			||||||
        self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
 | 
					        self.assertCountEqual(search_query("&document_type__id=" + str(dt.id)), [d2.id])
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            search_query("&correspondent__isnull"), [d2.id, d3.id, d4.id, d5.id]
 | 
					            search_query("&correspondent__isnull"),
 | 
				
			||||||
 | 
					            [d2.id, d3.id, d4.id, d5.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            search_query("&document_type__isnull"), [d1.id, d3.id, d4.id, d5.id]
 | 
					            search_query("&document_type__isnull"),
 | 
				
			||||||
 | 
					            [d1.id, d3.id, d4.id, d5.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)), [d3.id]
 | 
					            search_query("&tags__id__all=" + str(t.id) + "," + str(t2.id)),
 | 
				
			||||||
 | 
					            [d3.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id])
 | 
					        self.assertCountEqual(search_query("&tags__id__all=" + str(t.id)), [d3.id])
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            search_query("&tags__id__all=" + str(t2.id)), [d3.id, d4.id]
 | 
					            search_query("&tags__id__all=" + str(t2.id)),
 | 
				
			||||||
 | 
					            [d3.id, d4.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertIn(
 | 
					        self.assertIn(
 | 
				
			||||||
            d4.id,
 | 
					            d4.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&created__date__lt="
 | 
					                "&created__date__lt="
 | 
				
			||||||
                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
 | 
					                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertNotIn(
 | 
					        self.assertNotIn(
 | 
				
			||||||
            d4.id,
 | 
					            d4.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&created__date__gt="
 | 
					                "&created__date__gt="
 | 
				
			||||||
                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
 | 
					                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -627,40 +641,44 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
            d4.id,
 | 
					            d4.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&created__date__lt="
 | 
					                "&created__date__lt="
 | 
				
			||||||
                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
 | 
					                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIn(
 | 
					        self.assertIn(
 | 
				
			||||||
            d4.id,
 | 
					            d4.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&created__date__gt="
 | 
					                "&created__date__gt="
 | 
				
			||||||
                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
 | 
					                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertIn(
 | 
					        self.assertIn(
 | 
				
			||||||
            d5.id,
 | 
					            d5.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&added__date__lt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
 | 
					                "&added__date__lt="
 | 
				
			||||||
 | 
					                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertNotIn(
 | 
					        self.assertNotIn(
 | 
				
			||||||
            d5.id,
 | 
					            d5.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&added__date__gt=" + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d")
 | 
					                "&added__date__gt="
 | 
				
			||||||
 | 
					                + datetime.datetime(2020, 9, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertNotIn(
 | 
					        self.assertNotIn(
 | 
				
			||||||
            d5.id,
 | 
					            d5.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&added__date__lt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
 | 
					                "&added__date__lt="
 | 
				
			||||||
 | 
					                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIn(
 | 
					        self.assertIn(
 | 
				
			||||||
            d5.id,
 | 
					            d5.id,
 | 
				
			||||||
            search_query(
 | 
					            search_query(
 | 
				
			||||||
                "&added__date__gt=" + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d")
 | 
					                "&added__date__gt="
 | 
				
			||||||
 | 
					                + datetime.datetime(2020, 1, 2).strftime("%Y-%m-%d"),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -700,18 +718,22 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
            return [hit["id"] for hit in r.data["results"]]
 | 
					            return [hit["id"] for hit in r.data["results"]]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            search_query("&ordering=archive_serial_number"), [d3.id, d1.id, d2.id]
 | 
					            search_query("&ordering=archive_serial_number"),
 | 
				
			||||||
 | 
					            [d3.id, d1.id, d2.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            search_query("&ordering=-archive_serial_number"), [d2.id, d1.id, d3.id]
 | 
					            search_query("&ordering=-archive_serial_number"),
 | 
				
			||||||
 | 
					            [d2.id, d1.id, d3.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
 | 
					        self.assertListEqual(search_query("&ordering=title"), [d3.id, d2.id, d1.id])
 | 
				
			||||||
        self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
 | 
					        self.assertListEqual(search_query("&ordering=-title"), [d1.id, d2.id, d3.id])
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            search_query("&ordering=correspondent__name"), [d1.id, d3.id, d2.id]
 | 
					            search_query("&ordering=correspondent__name"),
 | 
				
			||||||
 | 
					            [d1.id, d3.id, d2.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            search_query("&ordering=-correspondent__name"), [d2.id, d3.id, d1.id]
 | 
					            search_query("&ordering=-correspondent__name"),
 | 
				
			||||||
 | 
					            [d2.id, d3.id, d1.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_statistics(self):
 | 
					    def test_statistics(self):
 | 
				
			||||||
@ -740,10 +762,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload(self, m):
 | 
					    def test_upload(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
@ -761,7 +785,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload_empty_metadata(self, m):
 | 
					    def test_upload_empty_metadata(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/",
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
@ -783,10 +808,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload_invalid_form(self, m):
 | 
					    def test_upload_invalid_form(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"documenst": f}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"documenst": f},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 400)
 | 
					        self.assertEqual(response.status_code, 400)
 | 
				
			||||||
        m.assert_not_called()
 | 
					        m.assert_not_called()
 | 
				
			||||||
@ -795,10 +822,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload_invalid_file(self, m):
 | 
					    def test_upload_invalid_file(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.zip"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.zip"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 400)
 | 
					        self.assertEqual(response.status_code, 400)
 | 
				
			||||||
        m.assert_not_called()
 | 
					        m.assert_not_called()
 | 
				
			||||||
@ -806,7 +835,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    @mock.patch("documents.views.async_task")
 | 
					    @mock.patch("documents.views.async_task")
 | 
				
			||||||
    def test_upload_with_title(self, async_task):
 | 
					    def test_upload_with_title(self, async_task):
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/",
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
@ -824,10 +854,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload_with_correspondent(self, async_task):
 | 
					    def test_upload_with_correspondent(self, async_task):
 | 
				
			||||||
        c = Correspondent.objects.create(name="test-corres")
 | 
					        c = Correspondent.objects.create(name="test-corres")
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f, "correspondent": c.id}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f, "correspondent": c.id},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -840,10 +872,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    @mock.patch("documents.views.async_task")
 | 
					    @mock.patch("documents.views.async_task")
 | 
				
			||||||
    def test_upload_with_invalid_correspondent(self, async_task):
 | 
					    def test_upload_with_invalid_correspondent(self, async_task):
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f, "correspondent": 3456}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f, "correspondent": 3456},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 400)
 | 
					        self.assertEqual(response.status_code, 400)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -853,10 +887,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_upload_with_document_type(self, async_task):
 | 
					    def test_upload_with_document_type(self, async_task):
 | 
				
			||||||
        dt = DocumentType.objects.create(name="invoice")
 | 
					        dt = DocumentType.objects.create(name="invoice")
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f, "document_type": dt.id}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f, "document_type": dt.id},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -869,10 +905,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    @mock.patch("documents.views.async_task")
 | 
					    @mock.patch("documents.views.async_task")
 | 
				
			||||||
    def test_upload_with_invalid_document_type(self, async_task):
 | 
					    def test_upload_with_invalid_document_type(self, async_task):
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f, "document_type": 34578}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f, "document_type": 34578},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 400)
 | 
					        self.assertEqual(response.status_code, 400)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -883,10 +921,12 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="tag1")
 | 
					        t1 = Tag.objects.create(name="tag1")
 | 
				
			||||||
        t2 = Tag.objects.create(name="tag2")
 | 
					        t2 = Tag.objects.create(name="tag2")
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/", {"document": f, "tags": [t2.id, t1.id]}
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
 | 
					                {"document": f, "tags": [t2.id, t1.id]},
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -901,7 +941,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="tag1")
 | 
					        t1 = Tag.objects.create(name="tag1")
 | 
				
			||||||
        t2 = Tag.objects.create(name="tag2")
 | 
					        t2 = Tag.objects.create(name="tag2")
 | 
				
			||||||
        with open(
 | 
					        with open(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), "rb"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
 | 
				
			||||||
 | 
					            "rb",
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                "/api/documents/post_document/",
 | 
					                "/api/documents/post_document/",
 | 
				
			||||||
@ -952,7 +993,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_get_metadata_no_archive(self):
 | 
					    def test_get_metadata_no_archive(self):
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", filename="file.pdf", mime_type="application/pdf"
 | 
					            title="test",
 | 
				
			||||||
 | 
					            filename="file.pdf",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
@ -999,7 +1042,8 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(response.status_code, 200)
 | 
					        self.assertEqual(response.status_code, 200)
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            response.data, {"correspondents": [], "tags": [], "document_types": []}
 | 
					            response.data,
 | 
				
			||||||
 | 
					            {"correspondents": [], "tags": [], "document_types": []},
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_get_suggestions_invalid_doc(self):
 | 
					    def test_get_suggestions_invalid_doc(self):
 | 
				
			||||||
@ -1010,10 +1054,15 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    @mock.patch("documents.views.match_tags")
 | 
					    @mock.patch("documents.views.match_tags")
 | 
				
			||||||
    @mock.patch("documents.views.match_document_types")
 | 
					    @mock.patch("documents.views.match_document_types")
 | 
				
			||||||
    def test_get_suggestions(
 | 
					    def test_get_suggestions(
 | 
				
			||||||
        self, match_document_types, match_tags, match_correspondents
 | 
					        self,
 | 
				
			||||||
 | 
					        match_document_types,
 | 
				
			||||||
 | 
					        match_tags,
 | 
				
			||||||
 | 
					        match_correspondents,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", mime_type="application/pdf", content="this is an invoice!"
 | 
					            title="test",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            content="this is an invoice!",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        match_tags.return_value = [Tag(id=56), Tag(id=123)]
 | 
					        match_tags.return_value = [Tag(id=56), Tag(id=123)]
 | 
				
			||||||
        match_document_types.return_value = [DocumentType(id=23)]
 | 
					        match_document_types.return_value = [DocumentType(id=23)]
 | 
				
			||||||
@ -1094,7 +1143,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.assertEqual(v1.user, self.user)
 | 
					        self.assertEqual(v1.user, self.user)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.patch(
 | 
					        response = self.client.patch(
 | 
				
			||||||
            f"/api/saved_views/{v1.id}/", {"show_in_sidebar": False}, format="json"
 | 
					            f"/api/saved_views/{v1.id}/",
 | 
				
			||||||
 | 
					            {"show_in_sidebar": False},
 | 
				
			||||||
 | 
					            format="json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        v1 = SavedView.objects.get(id=v1.id)
 | 
					        v1 = SavedView.objects.get(id=v1.id)
 | 
				
			||||||
@ -1183,7 +1234,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_regex_no_algorithm(self):
 | 
					    def test_regex_no_algorithm(self):
 | 
				
			||||||
        for endpoint in ["correspondents", "tags", "document_types"]:
 | 
					        for endpoint in ["correspondents", "tags", "document_types"]:
 | 
				
			||||||
            response = self.client.post(
 | 
					            response = self.client.post(
 | 
				
			||||||
                f"/api/{endpoint}/", {"name": "test", "match": "[0-9]"}, format="json"
 | 
					                f"/api/{endpoint}/",
 | 
				
			||||||
 | 
					                {"name": "test", "match": "[0-9]"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            self.assertEqual(response.status_code, 201, endpoint)
 | 
					            self.assertEqual(response.status_code, 201, endpoint)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1200,7 +1253,9 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_tag_color(self):
 | 
					    def test_tag_color(self):
 | 
				
			||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/tags/", {"name": "tag", "colour": 3}, format="json"
 | 
					            "/api/tags/",
 | 
				
			||||||
 | 
					            {"name": "tag", "colour": 3},
 | 
				
			||||||
 | 
					            format="json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 201)
 | 
					        self.assertEqual(response.status_code, 201)
 | 
				
			||||||
        self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a")
 | 
					        self.assertEqual(Tag.objects.get(id=response.data["id"]).color, "#b2df8a")
 | 
				
			||||||
@ -1213,14 +1268,17 @@ class TestDocumentApi(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_tag_color_invalid(self):
 | 
					    def test_tag_color_invalid(self):
 | 
				
			||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/tags/", {"name": "tag", "colour": 34}, format="json"
 | 
					            "/api/tags/",
 | 
				
			||||||
 | 
					            {"name": "tag", "colour": 34},
 | 
				
			||||||
 | 
					            format="json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(response.status_code, 400)
 | 
					        self.assertEqual(response.status_code, 400)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_tag_color_custom(self):
 | 
					    def test_tag_color_custom(self):
 | 
				
			||||||
        tag = Tag.objects.create(name="test", color="#abcdef")
 | 
					        tag = Tag.objects.create(name="test", color="#abcdef")
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"], 1
 | 
					            self.client.get(f"/api/tags/{tag.id}/", format="json").data["colour"],
 | 
				
			||||||
 | 
					            1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -1236,32 +1294,42 @@ class TestDocumentApiV2(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_tag_validate_color(self):
 | 
					    def test_tag_validate_color(self):
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.post(
 | 
					            self.client.post(
 | 
				
			||||||
                "/api/tags/", {"name": "test", "color": "#12fFaA"}, format="json"
 | 
					                "/api/tags/",
 | 
				
			||||||
 | 
					                {"name": "test", "color": "#12fFaA"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            ).status_code,
 | 
					            ).status_code,
 | 
				
			||||||
            201,
 | 
					            201,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.post(
 | 
					            self.client.post(
 | 
				
			||||||
                "/api/tags/", {"name": "test1", "color": "abcdef"}, format="json"
 | 
					                "/api/tags/",
 | 
				
			||||||
 | 
					                {"name": "test1", "color": "abcdef"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            ).status_code,
 | 
					            ).status_code,
 | 
				
			||||||
            400,
 | 
					            400,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.post(
 | 
					            self.client.post(
 | 
				
			||||||
                "/api/tags/", {"name": "test2", "color": "#abcdfg"}, format="json"
 | 
					                "/api/tags/",
 | 
				
			||||||
 | 
					                {"name": "test2", "color": "#abcdfg"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            ).status_code,
 | 
					            ).status_code,
 | 
				
			||||||
            400,
 | 
					            400,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.post(
 | 
					            self.client.post(
 | 
				
			||||||
                "/api/tags/", {"name": "test3", "color": "#asd"}, format="json"
 | 
					                "/api/tags/",
 | 
				
			||||||
 | 
					                {"name": "test3", "color": "#asd"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            ).status_code,
 | 
					            ).status_code,
 | 
				
			||||||
            400,
 | 
					            400,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.post(
 | 
					            self.client.post(
 | 
				
			||||||
                "/api/tags/", {"name": "test4", "color": "#12121212"}, format="json"
 | 
					                "/api/tags/",
 | 
				
			||||||
 | 
					                {"name": "test4", "color": "#12121212"},
 | 
				
			||||||
 | 
					                format="json",
 | 
				
			||||||
            ).status_code,
 | 
					            ).status_code,
 | 
				
			||||||
            400,
 | 
					            400,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1313,10 +1381,16 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        self.t2 = Tag.objects.create(name="t2")
 | 
					        self.t2 = Tag.objects.create(name="t2")
 | 
				
			||||||
        self.doc1 = Document.objects.create(checksum="A", title="A")
 | 
					        self.doc1 = Document.objects.create(checksum="A", title="A")
 | 
				
			||||||
        self.doc2 = Document.objects.create(
 | 
					        self.doc2 = Document.objects.create(
 | 
				
			||||||
            checksum="B", title="B", correspondent=self.c1, document_type=self.dt1
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            title="B",
 | 
				
			||||||
 | 
					            correspondent=self.c1,
 | 
				
			||||||
 | 
					            document_type=self.dt1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc3 = Document.objects.create(
 | 
					        self.doc3 = Document.objects.create(
 | 
				
			||||||
            checksum="C", title="C", correspondent=self.c2, document_type=self.dt2
 | 
					            checksum="C",
 | 
				
			||||||
 | 
					            title="C",
 | 
				
			||||||
 | 
					            correspondent=self.c2,
 | 
				
			||||||
 | 
					            document_type=self.dt2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc4 = Document.objects.create(checksum="D", title="D")
 | 
					        self.doc4 = Document.objects.create(checksum="D", title="D")
 | 
				
			||||||
        self.doc5 = Document.objects.create(checksum="E", title="E")
 | 
					        self.doc5 = Document.objects.create(checksum="E", title="E")
 | 
				
			||||||
@ -1327,7 +1401,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_set_correspondent(self):
 | 
					    def test_set_correspondent(self):
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
 | 
					        self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 1)
 | 
				
			||||||
        bulk_edit.set_correspondent(
 | 
					        bulk_edit.set_correspondent(
 | 
				
			||||||
            [self.doc1.id, self.doc2.id, self.doc3.id], self.c2.id
 | 
					            [self.doc1.id, self.doc2.id, self.doc3.id],
 | 
				
			||||||
 | 
					            self.c2.id,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
 | 
					        self.assertEqual(Document.objects.filter(correspondent=self.c2).count(), 3)
 | 
				
			||||||
        self.async_task.assert_called_once()
 | 
					        self.async_task.assert_called_once()
 | 
				
			||||||
@ -1345,7 +1420,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_set_document_type(self):
 | 
					    def test_set_document_type(self):
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
 | 
					        self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 1)
 | 
				
			||||||
        bulk_edit.set_document_type(
 | 
					        bulk_edit.set_document_type(
 | 
				
			||||||
            [self.doc1.id, self.doc2.id, self.doc3.id], self.dt2.id
 | 
					            [self.doc1.id, self.doc2.id, self.doc3.id],
 | 
				
			||||||
 | 
					            self.dt2.id,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
 | 
					        self.assertEqual(Document.objects.filter(document_type=self.dt2).count(), 3)
 | 
				
			||||||
        self.async_task.assert_called_once()
 | 
					        self.async_task.assert_called_once()
 | 
				
			||||||
@ -1363,7 +1439,8 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
    def test_add_tag(self):
 | 
					    def test_add_tag(self):
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
 | 
					        self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 2)
 | 
				
			||||||
        bulk_edit.add_tag(
 | 
					        bulk_edit.add_tag(
 | 
				
			||||||
            [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id], self.t1.id
 | 
					            [self.doc1.id, self.doc2.id, self.doc3.id, self.doc4.id],
 | 
				
			||||||
 | 
					            self.t1.id,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
 | 
					        self.assertEqual(Document.objects.filter(tags__id=self.t1.id).count(), 4)
 | 
				
			||||||
        self.async_task.assert_called_once()
 | 
					        self.async_task.assert_called_once()
 | 
				
			||||||
@ -1415,7 +1492,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "set_correspondent",
 | 
					                    "method": "set_correspondent",
 | 
				
			||||||
                    "parameters": {"correspondent": self.c1.id},
 | 
					                    "parameters": {"correspondent": self.c1.id},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1435,7 +1512,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "set_correspondent",
 | 
					                    "method": "set_correspondent",
 | 
				
			||||||
                    "parameters": {"correspondent": None},
 | 
					                    "parameters": {"correspondent": None},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1455,7 +1532,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "set_document_type",
 | 
					                    "method": "set_document_type",
 | 
				
			||||||
                    "parameters": {"document_type": self.dt1.id},
 | 
					                    "parameters": {"document_type": self.dt1.id},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1475,7 +1552,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "set_document_type",
 | 
					                    "method": "set_document_type",
 | 
				
			||||||
                    "parameters": {"document_type": None},
 | 
					                    "parameters": {"document_type": None},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1495,7 +1572,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "add_tag",
 | 
					                    "method": "add_tag",
 | 
				
			||||||
                    "parameters": {"tag": self.t1.id},
 | 
					                    "parameters": {"tag": self.t1.id},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1515,7 +1592,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc1.id],
 | 
					                    "documents": [self.doc1.id],
 | 
				
			||||||
                    "method": "remove_tag",
 | 
					                    "method": "remove_tag",
 | 
				
			||||||
                    "parameters": {"tag": self.t1.id},
 | 
					                    "parameters": {"tag": self.t1.id},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1538,7 +1615,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                        "add_tags": [self.t1.id],
 | 
					                        "add_tags": [self.t1.id],
 | 
				
			||||||
                        "remove_tags": [self.t2.id],
 | 
					                        "remove_tags": [self.t2.id],
 | 
				
			||||||
                    },
 | 
					                    },
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1555,7 +1632,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_edit/",
 | 
					            "/api/documents/bulk_edit/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc1.id], "method": "delete", "parameters": {}}
 | 
					                {"documents": [self.doc1.id], "method": "delete", "parameters": {}},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1580,7 +1657,11 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_edit/",
 | 
					            "/api/documents/bulk_edit/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc2.id], "method": "exterminate", "parameters": {}}
 | 
					                {
 | 
				
			||||||
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
 | 
					                    "method": "exterminate",
 | 
				
			||||||
 | 
					                    "parameters": {},
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1596,7 +1677,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "set_correspondent",
 | 
					                    "method": "set_correspondent",
 | 
				
			||||||
                    "parameters": {"correspondent": 345657},
 | 
					                    "parameters": {"correspondent": 345657},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1613,7 +1694,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "set_correspondent",
 | 
					                    "method": "set_correspondent",
 | 
				
			||||||
                    "parameters": {},
 | 
					                    "parameters": {},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1628,7 +1709,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "set_document_type",
 | 
					                    "method": "set_document_type",
 | 
				
			||||||
                    "parameters": {"document_type": 345657},
 | 
					                    "parameters": {"document_type": 345657},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1645,7 +1726,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "set_document_type",
 | 
					                    "method": "set_document_type",
 | 
				
			||||||
                    "parameters": {},
 | 
					                    "parameters": {},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1660,7 +1741,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "add_tag",
 | 
					                    "method": "add_tag",
 | 
				
			||||||
                    "parameters": {"tag": 345657},
 | 
					                    "parameters": {"tag": 345657},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1672,7 +1753,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_edit/",
 | 
					            "/api/documents/bulk_edit/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}}
 | 
					                {"documents": [self.doc2.id], "method": "add_tag", "parameters": {}},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1687,7 +1768,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "remove_tag",
 | 
					                    "method": "remove_tag",
 | 
				
			||||||
                    "parameters": {"tag": 345657},
 | 
					                    "parameters": {"tag": 345657},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1699,7 +1780,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_edit/",
 | 
					            "/api/documents/bulk_edit/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}}
 | 
					                {"documents": [self.doc2.id], "method": "remove_tag", "parameters": {}},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1717,7 +1798,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                        "add_tags": [self.t2.id, 1657],
 | 
					                        "add_tags": [self.t2.id, 1657],
 | 
				
			||||||
                        "remove_tags": [1123123],
 | 
					                        "remove_tags": [1123123],
 | 
				
			||||||
                    },
 | 
					                    },
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1731,7 +1812,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "modify_tags",
 | 
					                    "method": "modify_tags",
 | 
				
			||||||
                    "parameters": {"remove_tags": [1123123]},
 | 
					                    "parameters": {"remove_tags": [1123123]},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1744,7 +1825,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
                    "documents": [self.doc2.id],
 | 
					                    "documents": [self.doc2.id],
 | 
				
			||||||
                    "method": "modify_tags",
 | 
					                    "method": "modify_tags",
 | 
				
			||||||
                    "parameters": {"add_tags": [self.t2.id, 1657]},
 | 
					                    "parameters": {"add_tags": [self.t2.id, 1657]},
 | 
				
			||||||
                }
 | 
					                },
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1774,7 +1855,7 @@ class TestBulkEdit(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/selection_data/",
 | 
					            "/api/documents/selection_data/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]}
 | 
					                {"documents": [self.doc1.id, self.doc2.id, self.doc4.id, self.doc5.id]},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1856,7 +1937,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_download/",
 | 
					            "/api/documents/bulk_download/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc2.id, self.doc3.id], "content": "originals"}
 | 
					                {"documents": [self.doc2.id, self.doc3.id], "content": "originals"},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1914,17 +1995,20 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            with self.doc2.source_file as f:
 | 
					            with self.doc2.source_file as f:
 | 
				
			||||||
                self.assertEqual(
 | 
					                self.assertEqual(
 | 
				
			||||||
                    f.read(), zipf.read("originals/2021-01-01 document A.pdf")
 | 
					                    f.read(),
 | 
				
			||||||
 | 
					                    zipf.read("originals/2021-01-01 document A.pdf"),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            with self.doc3.archive_file as f:
 | 
					            with self.doc3.archive_file as f:
 | 
				
			||||||
                self.assertEqual(
 | 
					                self.assertEqual(
 | 
				
			||||||
                    f.read(), zipf.read("archive/2020-03-21 document B.pdf")
 | 
					                    f.read(),
 | 
				
			||||||
 | 
					                    zipf.read("archive/2020-03-21 document B.pdf"),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            with self.doc3.source_file as f:
 | 
					            with self.doc3.source_file as f:
 | 
				
			||||||
                self.assertEqual(
 | 
					                self.assertEqual(
 | 
				
			||||||
                    f.read(), zipf.read("originals/2020-03-21 document B.jpg")
 | 
					                    f.read(),
 | 
				
			||||||
 | 
					                    zipf.read("originals/2020-03-21 document B.jpg"),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_filename_clashes(self):
 | 
					    def test_filename_clashes(self):
 | 
				
			||||||
@ -1953,7 +2037,7 @@ class TestBulkDownload(DirectoriesMixin, APITestCase):
 | 
				
			|||||||
        response = self.client.post(
 | 
					        response = self.client.post(
 | 
				
			||||||
            "/api/documents/bulk_download/",
 | 
					            "/api/documents/bulk_download/",
 | 
				
			||||||
            json.dumps(
 | 
					            json.dumps(
 | 
				
			||||||
                {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"}
 | 
					                {"documents": [self.doc2.id, self.doc2b.id], "compression": "lzma"},
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            content_type="application/json",
 | 
					            content_type="application/json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -1968,13 +2052,16 @@ class TestApiAuth(APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401)
 | 
					        self.assertEqual(self.client.get(f"/api/documents/{d.id}/").status_code, 401)
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get(f"/api/documents/{d.id}/download/").status_code, 401
 | 
					            self.client.get(f"/api/documents/{d.id}/download/").status_code,
 | 
				
			||||||
 | 
					            401,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get(f"/api/documents/{d.id}/preview/").status_code, 401
 | 
					            self.client.get(f"/api/documents/{d.id}/preview/").status_code,
 | 
				
			||||||
 | 
					            401,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get(f"/api/documents/{d.id}/thumb/").status_code, 401
 | 
					            self.client.get(f"/api/documents/{d.id}/thumb/").status_code,
 | 
				
			||||||
 | 
					            401,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(self.client.get("/api/tags/").status_code, 401)
 | 
					        self.assertEqual(self.client.get("/api/tags/").status_code, 401)
 | 
				
			||||||
@ -1987,10 +2074,12 @@ class TestApiAuth(APITestCase):
 | 
				
			|||||||
        self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401)
 | 
					        self.assertEqual(self.client.get("/api/search/autocomplete/").status_code, 401)
 | 
				
			||||||
        self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401)
 | 
					        self.assertEqual(self.client.get("/api/documents/bulk_edit/").status_code, 401)
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get("/api/documents/bulk_download/").status_code, 401
 | 
					            self.client.get("/api/documents/bulk_download/").status_code,
 | 
				
			||||||
 | 
					            401,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.client.get("/api/documents/selection_data/").status_code, 401
 | 
					            self.client.get("/api/documents/selection_data/").status_code,
 | 
				
			||||||
 | 
					            401,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_api_version_no_auth(self):
 | 
					    def test_api_version_no_auth(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -4,10 +4,11 @@ from unittest import mock
 | 
				
			|||||||
from django.core.checks import Error
 | 
					from django.core.checks import Error
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .factories import DocumentFactory
 | 
					from ..checks import changed_password_check
 | 
				
			||||||
from .. import document_consumer_declaration
 | 
					from ..checks import parser_check
 | 
				
			||||||
from ..checks import changed_password_check, parser_check
 | 
					 | 
				
			||||||
from ..models import Document
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					from ..signals import document_consumer_declaration
 | 
				
			||||||
 | 
					from .factories import DocumentFactory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class ChecksTestCase(TestCase):
 | 
					class ChecksTestCase(TestCase):
 | 
				
			||||||
@ -30,7 +31,7 @@ class ChecksTestCase(TestCase):
 | 
				
			|||||||
                [
 | 
					                [
 | 
				
			||||||
                    Error(
 | 
					                    Error(
 | 
				
			||||||
                        "No parsers found. This is a bug. The consumer won't be "
 | 
					                        "No parsers found. This is a bug. The consumer won't be "
 | 
				
			||||||
                        "able to consume any documents without parsers."
 | 
					                        "able to consume any documents without parsers.",
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                ],
 | 
					                ],
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
				
			|||||||
@ -5,14 +5,15 @@ from unittest import mock
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
import pytest
 | 
					import pytest
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.classifier import (
 | 
					from documents.classifier import DocumentClassifier
 | 
				
			||||||
    DocumentClassifier,
 | 
					from documents.classifier import IncompatibleClassifierVersionError
 | 
				
			||||||
    IncompatibleClassifierVersionError,
 | 
					from documents.classifier import load_classifier
 | 
				
			||||||
    load_classifier,
 | 
					from documents.models import Correspondent
 | 
				
			||||||
)
 | 
					from documents.models import Document
 | 
				
			||||||
from documents.models import Correspondent, Document, Tag, DocumentType
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -23,26 +24,37 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def generate_test_data(self):
 | 
					    def generate_test_data(self):
 | 
				
			||||||
        self.c1 = Correspondent.objects.create(
 | 
					        self.c1 = Correspondent.objects.create(
 | 
				
			||||||
            name="c1", matching_algorithm=Correspondent.MATCH_AUTO
 | 
					            name="c1",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.c2 = Correspondent.objects.create(name="c2")
 | 
					        self.c2 = Correspondent.objects.create(name="c2")
 | 
				
			||||||
        self.c3 = Correspondent.objects.create(
 | 
					        self.c3 = Correspondent.objects.create(
 | 
				
			||||||
            name="c3", matching_algorithm=Correspondent.MATCH_AUTO
 | 
					            name="c3",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.t1 = Tag.objects.create(
 | 
					        self.t1 = Tag.objects.create(
 | 
				
			||||||
            name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12
 | 
					            name="t1",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_AUTO,
 | 
				
			||||||
 | 
					            pk=12,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.t2 = Tag.objects.create(
 | 
					        self.t2 = Tag.objects.create(
 | 
				
			||||||
            name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True
 | 
					            name="t2",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_ANY,
 | 
				
			||||||
 | 
					            pk=34,
 | 
				
			||||||
 | 
					            is_inbox_tag=True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.t3 = Tag.objects.create(
 | 
					        self.t3 = Tag.objects.create(
 | 
				
			||||||
            name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45
 | 
					            name="t3",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_AUTO,
 | 
				
			||||||
 | 
					            pk=45,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.dt = DocumentType.objects.create(
 | 
					        self.dt = DocumentType.objects.create(
 | 
				
			||||||
            name="dt", matching_algorithm=DocumentType.MATCH_AUTO
 | 
					            name="dt",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.dt2 = DocumentType.objects.create(
 | 
					        self.dt2 = DocumentType.objects.create(
 | 
				
			||||||
            name="dt2", matching_algorithm=DocumentType.MATCH_AUTO
 | 
					            name="dt2",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.doc1 = Document.objects.create(
 | 
					        self.doc1 = Document.objects.create(
 | 
				
			||||||
@ -59,7 +71,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
            checksum="B",
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc_inbox = Document.objects.create(
 | 
					        self.doc_inbox = Document.objects.create(
 | 
				
			||||||
            title="doc235", content="aa", checksum="C"
 | 
					            title="doc235",
 | 
				
			||||||
 | 
					            content="aa",
 | 
				
			||||||
 | 
					            checksum="C",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.doc1.tags.add(self.t1)
 | 
					        self.doc1.tags.add(self.t1)
 | 
				
			||||||
@ -90,27 +104,33 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.generate_test_data()
 | 
					        self.generate_test_data()
 | 
				
			||||||
        self.classifier.train()
 | 
					        self.classifier.train()
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk]
 | 
					            list(self.classifier.correspondent_classifier.classes_),
 | 
				
			||||||
 | 
					            [-1, self.c1.pk],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk]
 | 
					            list(self.classifier.tags_binarizer.classes_),
 | 
				
			||||||
 | 
					            [self.t1.pk, self.t3.pk],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def testPredict(self):
 | 
					    def testPredict(self):
 | 
				
			||||||
        self.generate_test_data()
 | 
					        self.generate_test_data()
 | 
				
			||||||
        self.classifier.train()
 | 
					        self.classifier.train()
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.classifier.predict_correspondent(self.doc1.content), self.c1.pk
 | 
					            self.classifier.predict_correspondent(self.doc1.content),
 | 
				
			||||||
 | 
					            self.c1.pk,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
 | 
					        self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            self.classifier.predict_tags(self.doc1.content), [self.t1.pk]
 | 
					            self.classifier.predict_tags(self.doc1.content),
 | 
				
			||||||
 | 
					            [self.t1.pk],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk]
 | 
					            self.classifier.predict_tags(self.doc2.content),
 | 
				
			||||||
 | 
					            [self.t1.pk, self.t3.pk],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            self.classifier.predict_document_type(self.doc1.content), self.dt.pk
 | 
					            self.classifier.predict_document_type(self.doc1.content),
 | 
				
			||||||
 | 
					            self.dt.pk,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
 | 
					        self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -133,7 +153,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        current_ver = DocumentClassifier.FORMAT_VERSION
 | 
					        current_ver = DocumentClassifier.FORMAT_VERSION
 | 
				
			||||||
        with mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1
 | 
					            "documents.classifier.DocumentClassifier.FORMAT_VERSION",
 | 
				
			||||||
 | 
					            current_ver + 1,
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            # assure that we won't load old classifiers.
 | 
					            # assure that we won't load old classifiers.
 | 
				
			||||||
            self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
 | 
					            self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
 | 
				
			||||||
@ -157,7 +178,7 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertFalse(new_classifier.train())
 | 
					        self.assertFalse(new_classifier.train())
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
 | 
					        MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_load_and_classify(self):
 | 
					    def test_load_and_classify(self):
 | 
				
			||||||
        self.generate_test_data()
 | 
					        self.generate_test_data()
 | 
				
			||||||
@ -169,7 +190,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_one_correspondent_predict(self):
 | 
					    def test_one_correspondent_predict(self):
 | 
				
			||||||
        c1 = Correspondent.objects.create(
 | 
					        c1 = Correspondent.objects.create(
 | 
				
			||||||
            name="c1", matching_algorithm=Correspondent.MATCH_AUTO
 | 
					            name="c1",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1",
 | 
					            title="doc1",
 | 
				
			||||||
@ -183,7 +205,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_one_correspondent_predict_manydocs(self):
 | 
					    def test_one_correspondent_predict_manydocs(self):
 | 
				
			||||||
        c1 = Correspondent.objects.create(
 | 
					        c1 = Correspondent.objects.create(
 | 
				
			||||||
            name="c1", matching_algorithm=Correspondent.MATCH_AUTO
 | 
					            name="c1",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1",
 | 
					            title="doc1",
 | 
				
			||||||
@ -192,7 +215,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
            checksum="A",
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="doc2", content="this is a document from noone", checksum="B"
 | 
					            title="doc2",
 | 
				
			||||||
 | 
					            content="this is a document from noone",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.classifier.train()
 | 
					        self.classifier.train()
 | 
				
			||||||
@ -201,7 +226,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_one_type_predict(self):
 | 
					    def test_one_type_predict(self):
 | 
				
			||||||
        dt = DocumentType.objects.create(
 | 
					        dt = DocumentType.objects.create(
 | 
				
			||||||
            name="dt", matching_algorithm=DocumentType.MATCH_AUTO
 | 
					            name="dt",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
@ -216,7 +242,8 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_one_type_predict_manydocs(self):
 | 
					    def test_one_type_predict_manydocs(self):
 | 
				
			||||||
        dt = DocumentType.objects.create(
 | 
					        dt = DocumentType.objects.create(
 | 
				
			||||||
            name="dt", matching_algorithm=DocumentType.MATCH_AUTO
 | 
					            name="dt",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
@ -227,7 +254,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c2", checksum="B"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.classifier.train()
 | 
					        self.classifier.train()
 | 
				
			||||||
@ -238,7 +267,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
					        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c1", checksum="A"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1.tags.add(t1)
 | 
					        doc1.tags.add(t1)
 | 
				
			||||||
@ -249,7 +280,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
					        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c1", checksum="A"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.classifier.train()
 | 
					        self.classifier.train()
 | 
				
			||||||
@ -260,7 +293,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
 | 
					        t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc4 = Document.objects.create(
 | 
					        doc4 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c4", checksum="D"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c4",
 | 
				
			||||||
 | 
					            checksum="D",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc4.tags.add(t1)
 | 
					        doc4.tags.add(t1)
 | 
				
			||||||
@ -273,16 +308,24 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
 | 
					        t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c1", checksum="A"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c2", checksum="B"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc3 = Document.objects.create(
 | 
					        doc3 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c3", checksum="C"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c3",
 | 
				
			||||||
 | 
					            checksum="C",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc4 = Document.objects.create(
 | 
					        doc4 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c4", checksum="D"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c4",
 | 
				
			||||||
 | 
					            checksum="D",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1.tags.add(t1)
 | 
					        doc1.tags.add(t1)
 | 
				
			||||||
@ -300,10 +343,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
					        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c1", checksum="A"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="doc2", content="this is a document from c2", checksum="B"
 | 
					            title="doc2",
 | 
				
			||||||
 | 
					            content="this is a document from c2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1.tags.add(t1)
 | 
					        doc1.tags.add(t1)
 | 
				
			||||||
@ -316,10 +363,14 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
					        t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", content="this is a document from c1", checksum="A"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            content="this is a document from c1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            title="doc2", content="this is a document from c2", checksum="B"
 | 
					            title="doc2",
 | 
				
			||||||
 | 
					            content="this is a document from c2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1.tags.add(t1)
 | 
					        doc1.tags.add(t1)
 | 
				
			||||||
@ -338,13 +389,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        load.assert_called_once()
 | 
					        load.assert_called_once()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}}
 | 
					        CACHES={
 | 
				
			||||||
 | 
					            "default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"},
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
 | 
					        MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    @pytest.mark.skip(
 | 
					    @pytest.mark.skip(
 | 
				
			||||||
        reason="Disabled caching due to high memory usage - need to investigate."
 | 
					        reason="Disabled caching due to high memory usage - need to investigate.",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_load_classifier_cached(self):
 | 
					    def test_load_classifier_cached(self):
 | 
				
			||||||
        classifier = load_classifier()
 | 
					        classifier = load_classifier()
 | 
				
			||||||
 | 
				
			|||||||
@ -6,13 +6,20 @@ from unittest import mock
 | 
				
			|||||||
from unittest.mock import MagicMock
 | 
					from unittest.mock import MagicMock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .utils import DirectoriesMixin
 | 
					from ..consumer import Consumer
 | 
				
			||||||
from ..consumer import Consumer, ConsumerError
 | 
					from ..consumer import ConsumerError
 | 
				
			||||||
from ..models import FileInfo, Tag, Correspondent, DocumentType, Document
 | 
					from ..models import Correspondent
 | 
				
			||||||
from ..parsers import DocumentParser, ParseError
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					from ..models import DocumentType
 | 
				
			||||||
 | 
					from ..models import FileInfo
 | 
				
			||||||
 | 
					from ..models import Tag
 | 
				
			||||||
 | 
					from ..parsers import DocumentParser
 | 
				
			||||||
 | 
					from ..parsers import ParseError
 | 
				
			||||||
from ..tasks import sanity_check
 | 
					from ..tasks import sanity_check
 | 
				
			||||||
 | 
					from .utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestAttributes(TestCase):
 | 
					class TestAttributes(TestCase):
 | 
				
			||||||
@ -33,12 +40,18 @@ class TestAttributes(TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_guess_attributes_from_name_when_title_starts_with_dash(self):
 | 
					    def test_guess_attributes_from_name_when_title_starts_with_dash(self):
 | 
				
			||||||
        self._test_guess_attributes_from_name(
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
            "- weird but should not break.pdf", None, "- weird but should not break", ()
 | 
					            "- weird but should not break.pdf",
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					            "- weird but should not break",
 | 
				
			||||||
 | 
					            (),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_guess_attributes_from_name_when_title_ends_with_dash(self):
 | 
					    def test_guess_attributes_from_name_when_title_ends_with_dash(self):
 | 
				
			||||||
        self._test_guess_attributes_from_name(
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
            "weird but should not break -.pdf", None, "weird but should not break -", ()
 | 
					            "weird but should not break -.pdf",
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					            "weird but should not break -",
 | 
				
			||||||
 | 
					            (),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -53,7 +66,12 @@ class TestFieldPermutations(TestCase):
 | 
				
			|||||||
    valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
 | 
					    valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _test_guessed_attributes(
 | 
					    def _test_guessed_attributes(
 | 
				
			||||||
        self, filename, created=None, correspondent=None, title=None, tags=None
 | 
					        self,
 | 
				
			||||||
 | 
					        filename,
 | 
				
			||||||
 | 
					        created=None,
 | 
				
			||||||
 | 
					        correspondent=None,
 | 
				
			||||||
 | 
					        title=None,
 | 
				
			||||||
 | 
					        tags=None,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        info = FileInfo.from_filename(filename)
 | 
					        info = FileInfo.from_filename(filename)
 | 
				
			||||||
@ -131,7 +149,7 @@ class TestFieldPermutations(TestCase):
 | 
				
			|||||||
            FILENAME_PARSE_TRANSFORMS=[
 | 
					            FILENAME_PARSE_TRANSFORMS=[
 | 
				
			||||||
                (all_patt, "all.gif"),
 | 
					                (all_patt, "all.gif"),
 | 
				
			||||||
                (all_patt, "anotherall.gif"),
 | 
					                (all_patt, "anotherall.gif"),
 | 
				
			||||||
            ]
 | 
					            ],
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            info = FileInfo.from_filename(filename)
 | 
					            info = FileInfo.from_filename(filename)
 | 
				
			||||||
            self.assertEqual(info.title, "all")
 | 
					            self.assertEqual(info.title, "all")
 | 
				
			||||||
@ -141,7 +159,7 @@ class TestFieldPermutations(TestCase):
 | 
				
			|||||||
            FILENAME_PARSE_TRANSFORMS=[
 | 
					            FILENAME_PARSE_TRANSFORMS=[
 | 
				
			||||||
                (none_patt, "none.gif"),
 | 
					                (none_patt, "none.gif"),
 | 
				
			||||||
                (all_patt, "anotherall.gif"),
 | 
					                (all_patt, "anotherall.gif"),
 | 
				
			||||||
            ]
 | 
					            ],
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            info = FileInfo.from_filename(filename)
 | 
					            info = FileInfo.from_filename(filename)
 | 
				
			||||||
            self.assertEqual(info.title, "anotherall")
 | 
					            self.assertEqual(info.title, "anotherall")
 | 
				
			||||||
@ -238,7 +256,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def make_dummy_parser(self, logging_group, progress_callback=None):
 | 
					    def make_dummy_parser(self, logging_group, progress_callback=None):
 | 
				
			||||||
        return DummyParser(
 | 
					        return DummyParser(
 | 
				
			||||||
            logging_group, self.dirs.scratch_dir, self.get_test_archive_file()
 | 
					            logging_group,
 | 
				
			||||||
 | 
					            self.dirs.scratch_dir,
 | 
				
			||||||
 | 
					            self.get_test_archive_file(),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_faulty_parser(self, logging_group, progress_callback=None):
 | 
					    def make_faulty_parser(self, logging_group, progress_callback=None):
 | 
				
			||||||
@ -257,7 +277,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                    "mime_types": {"application/pdf": ".pdf"},
 | 
					                    "mime_types": {"application/pdf": ".pdf"},
 | 
				
			||||||
                    "weight": 0,
 | 
					                    "weight": 0,
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        self.addCleanup(patcher.stop)
 | 
					        self.addCleanup(patcher.stop)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -282,7 +302,11 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def get_test_archive_file(self):
 | 
					    def get_test_archive_file(self):
 | 
				
			||||||
        src = os.path.join(
 | 
					        src = os.path.join(
 | 
				
			||||||
            os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"
 | 
					            os.path.dirname(__file__),
 | 
				
			||||||
 | 
					            "samples",
 | 
				
			||||||
 | 
					            "documents",
 | 
				
			||||||
 | 
					            "archive",
 | 
				
			||||||
 | 
					            "0000001.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
 | 
					        dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
 | 
				
			||||||
        shutil.copy(src, dst)
 | 
					        shutil.copy(src, dst)
 | 
				
			||||||
@ -296,7 +320,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(document.content, "The Text")
 | 
					        self.assertEqual(document.content, "The Text")
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            document.title, os.path.splitext(os.path.basename(filename))[0]
 | 
					            document.title,
 | 
				
			||||||
 | 
					            os.path.splitext(os.path.basename(filename))[0],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIsNone(document.correspondent)
 | 
					        self.assertIsNone(document.correspondent)
 | 
				
			||||||
        self.assertIsNone(document.document_type)
 | 
					        self.assertIsNone(document.document_type)
 | 
				
			||||||
@ -339,7 +364,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        override_filename = "Statement for November.pdf"
 | 
					        override_filename = "Statement for November.pdf"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document = self.consumer.try_consume_file(
 | 
					        document = self.consumer.try_consume_file(
 | 
				
			||||||
            filename, override_filename=override_filename
 | 
					            filename,
 | 
				
			||||||
 | 
					            override_filename=override_filename,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(document.title, "Statement for November")
 | 
					        self.assertEqual(document.title, "Statement for November")
 | 
				
			||||||
@ -348,7 +374,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def testOverrideTitle(self):
 | 
					    def testOverrideTitle(self):
 | 
				
			||||||
        document = self.consumer.try_consume_file(
 | 
					        document = self.consumer.try_consume_file(
 | 
				
			||||||
            self.get_test_file(), override_title="Override Title"
 | 
					            self.get_test_file(),
 | 
				
			||||||
 | 
					            override_title="Override Title",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(document.title, "Override Title")
 | 
					        self.assertEqual(document.title, "Override Title")
 | 
				
			||||||
        self._assert_first_last_send_progress()
 | 
					        self._assert_first_last_send_progress()
 | 
				
			||||||
@ -357,7 +384,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        c = Correspondent.objects.create(name="test")
 | 
					        c = Correspondent.objects.create(name="test")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document = self.consumer.try_consume_file(
 | 
					        document = self.consumer.try_consume_file(
 | 
				
			||||||
            self.get_test_file(), override_correspondent_id=c.pk
 | 
					            self.get_test_file(),
 | 
				
			||||||
 | 
					            override_correspondent_id=c.pk,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(document.correspondent.id, c.id)
 | 
					        self.assertEqual(document.correspondent.id, c.id)
 | 
				
			||||||
        self._assert_first_last_send_progress()
 | 
					        self._assert_first_last_send_progress()
 | 
				
			||||||
@ -366,7 +394,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        dt = DocumentType.objects.create(name="test")
 | 
					        dt = DocumentType.objects.create(name="test")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document = self.consumer.try_consume_file(
 | 
					        document = self.consumer.try_consume_file(
 | 
				
			||||||
            self.get_test_file(), override_document_type_id=dt.pk
 | 
					            self.get_test_file(),
 | 
				
			||||||
 | 
					            override_document_type_id=dt.pk,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(document.document_type.id, dt.id)
 | 
					        self.assertEqual(document.document_type.id, dt.id)
 | 
				
			||||||
        self._assert_first_last_send_progress()
 | 
					        self._assert_first_last_send_progress()
 | 
				
			||||||
@ -376,7 +405,8 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        t2 = Tag.objects.create(name="t2")
 | 
					        t2 = Tag.objects.create(name="t2")
 | 
				
			||||||
        t3 = Tag.objects.create(name="t3")
 | 
					        t3 = Tag.objects.create(name="t3")
 | 
				
			||||||
        document = self.consumer.try_consume_file(
 | 
					        document = self.consumer.try_consume_file(
 | 
				
			||||||
            self.get_test_file(), override_tag_ids=[t1.id, t3.id]
 | 
					            self.get_test_file(),
 | 
				
			||||||
 | 
					            override_tag_ids=[t1.id, t3.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertIn(t1, document.tags.all())
 | 
					        self.assertIn(t1, document.tags.all())
 | 
				
			||||||
@ -446,7 +476,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                    "mime_types": {"application/pdf": ".pdf"},
 | 
					                    "mime_types": {"application/pdf": ".pdf"},
 | 
				
			||||||
                    "weight": 0,
 | 
					                    "weight": 0,
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertRaisesMessage(
 | 
					        self.assertRaisesMessage(
 | 
				
			||||||
@ -595,16 +625,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                    "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
 | 
					                    "mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
 | 
				
			||||||
                    "weight": 0,
 | 
					                    "weight": 0,
 | 
				
			||||||
                },
 | 
					                },
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        doc1 = self.consumer.try_consume_file(
 | 
					        doc1 = self.consumer.try_consume_file(
 | 
				
			||||||
            os.path.join(settings.CONSUMPTION_DIR, "simple.png")
 | 
					            os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = self.consumer.try_consume_file(
 | 
					        doc2 = self.consumer.try_consume_file(
 | 
				
			||||||
            os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")
 | 
					            os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc3 = self.consumer.try_consume_file(
 | 
					        doc3 = self.consumer.try_consume_file(
 | 
				
			||||||
            os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf")
 | 
					            os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(doc1.filename, "simple.png")
 | 
					        self.assertEqual(doc1.filename, "simple.png")
 | 
				
			||||||
@ -691,7 +721,9 @@ class PostConsumeTestCase(TestCase):
 | 
				
			|||||||
            with override_settings(POST_CONSUME_SCRIPT=script.name):
 | 
					            with override_settings(POST_CONSUME_SCRIPT=script.name):
 | 
				
			||||||
                c = Correspondent.objects.create(name="my_bank")
 | 
					                c = Correspondent.objects.create(name="my_bank")
 | 
				
			||||||
                doc = Document.objects.create(
 | 
					                doc = Document.objects.create(
 | 
				
			||||||
                    title="Test", mime_type="application/pdf", correspondent=c
 | 
					                    title="Test",
 | 
				
			||||||
 | 
					                    mime_type="application/pdf",
 | 
				
			||||||
 | 
					                    correspondent=c,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                tag1 = Tag.objects.create(name="a")
 | 
					                tag1 = Tag.objects.create(name="a")
 | 
				
			||||||
                tag2 = Tag.objects.create(name="b")
 | 
					                tag2 = Tag.objects.create(name="b")
 | 
				
			||||||
 | 
				
			|||||||
@ -5,15 +5,16 @@ from uuid import uuid4
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from dateutil import tz
 | 
					from dateutil import tz
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.parsers import parse_date
 | 
					from documents.parsers import parse_date
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestDate(TestCase):
 | 
					class TestDate(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SAMPLE_FILES = os.path.join(
 | 
					    SAMPLE_FILES = os.path.join(
 | 
				
			||||||
        os.path.dirname(__file__), "../../paperless_tesseract/tests/samples"
 | 
					        os.path.dirname(__file__),
 | 
				
			||||||
 | 
					        "../../paperless_tesseract/tests/samples",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
 | 
					    SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,11 +112,11 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(FILENAME_DATE_ORDER="YMD")
 | 
					    @override_settings(FILENAME_DATE_ORDER="YMD")
 | 
				
			||||||
    def test_filename_date_parse_invalid(self, *args):
 | 
					    def test_filename_date_parse_invalid(self, *args):
 | 
				
			||||||
        self.assertIsNone(
 | 
					        self.assertIsNone(
 | 
				
			||||||
            parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")
 | 
					            parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
 | 
					        IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_ignored_dates(self, *args):
 | 
					    def test_ignored_dates(self, *args):
 | 
				
			||||||
        text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
 | 
					        text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
 | 
				
			||||||
 | 
				
			|||||||
@ -3,10 +3,12 @@ import tempfile
 | 
				
			|||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..models import Document, Correspondent
 | 
					from ..models import Correspondent
 | 
				
			||||||
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestDocument(TestCase):
 | 
					class TestDocument(TestCase):
 | 
				
			||||||
 | 
				
			|||||||
@ -9,17 +9,19 @@ from unittest import mock
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db import DatabaseError
 | 
					from django.db import DatabaseError
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ..file_handling import create_source_path_directory
 | 
				
			||||||
 | 
					from ..file_handling import delete_empty_directories
 | 
				
			||||||
 | 
					from ..file_handling import generate_filename
 | 
				
			||||||
 | 
					from ..file_handling import generate_unique_filename
 | 
				
			||||||
 | 
					from ..models import Correspondent
 | 
				
			||||||
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					from ..models import DocumentType
 | 
				
			||||||
 | 
					from ..models import Tag
 | 
				
			||||||
from .utils import DirectoriesMixin
 | 
					from .utils import DirectoriesMixin
 | 
				
			||||||
from ..file_handling import (
 | 
					 | 
				
			||||||
    generate_filename,
 | 
					 | 
				
			||||||
    create_source_path_directory,
 | 
					 | 
				
			||||||
    delete_empty_directories,
 | 
					 | 
				
			||||||
    generate_unique_filename,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from ..models import Document, Correspondent, Tag, DocumentType
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestFileHandling(DirectoriesMixin, TestCase):
 | 
					class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			||||||
@ -34,7 +36,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_GPG
 | 
					        document.storage_type = Document.STORAGE_TYPE_GPG
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            generate_filename(document), "{:07d}.pdf.gpg".format(document.pk)
 | 
					            generate_filename(document),
 | 
				
			||||||
 | 
					            "{:07d}.pdf.gpg".format(document.pk),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
@ -75,7 +78,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
 | 
				
			||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True
 | 
					            os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"),
 | 
				
			||||||
 | 
					            True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
@ -93,7 +97,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # Test source_path
 | 
					        # Test source_path
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf"
 | 
					            document.source_path,
 | 
				
			||||||
 | 
					            settings.ORIGINALS_DIR + "/none/none.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Make the folder read- and execute-only (no writing and no renaming)
 | 
					        # Make the folder read- and execute-only (no writing and no renaming)
 | 
				
			||||||
@ -105,7 +110,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # Check proper handling of files
 | 
					        # Check proper handling of files
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
 | 
					            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
 | 
				
			||||||
 | 
					            True,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(document.filename, "none/none.pdf")
 | 
					        self.assertEqual(document.filename, "none/none.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -145,7 +151,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
            # Check proper handling of files
 | 
					            # Check proper handling of files
 | 
				
			||||||
            self.assertTrue(os.path.isfile(document.source_path))
 | 
					            self.assertTrue(os.path.isfile(document.source_path))
 | 
				
			||||||
            self.assertEqual(
 | 
					            self.assertEqual(
 | 
				
			||||||
                os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
 | 
					                os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
 | 
				
			||||||
 | 
					                True,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            self.assertEqual(document.filename, "none/none.pdf")
 | 
					            self.assertEqual(document.filename, "none/none.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -167,7 +174,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        pk = document.pk
 | 
					        pk = document.pk
 | 
				
			||||||
        document.delete()
 | 
					        document.delete()
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
 | 
					            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
 | 
				
			||||||
 | 
					            False,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -192,7 +200,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
 | 
					        self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
 | 
				
			||||||
        document.delete()
 | 
					        document.delete()
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
 | 
					            os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"),
 | 
				
			||||||
 | 
					            False,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
				
			||||||
        self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
 | 
					        self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
 | 
				
			||||||
@ -363,7 +372,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
 | 
					        self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="doc2", checksum="B", mime_type="application/pdf"
 | 
					            title="doc2",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(doc), "doc2.pdf")
 | 
					        self.assertEqual(generate_filename(doc), "doc2.pdf")
 | 
				
			||||||
@ -380,12 +391,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}"
 | 
					        PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_created_year_month_day(self):
 | 
					    def test_created_year_month_day(self):
 | 
				
			||||||
        d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
 | 
					        d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", mime_type="application/pdf", created=d1
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            created=d1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
 | 
					        self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
 | 
				
			||||||
@ -395,12 +408,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
 | 
					        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}"
 | 
					        PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_added_year_month_day(self):
 | 
					    def test_added_year_month_day(self):
 | 
				
			||||||
        d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
 | 
					        d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", mime_type="application/pdf", added=d1
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            added=d1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
 | 
					        self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
 | 
				
			||||||
@ -410,7 +425,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
 | 
					        self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}"
 | 
					        PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_nested_directory_cleanup(self):
 | 
					    def test_nested_directory_cleanup(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
@ -431,7 +446,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        document.delete()
 | 
					        document.delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False
 | 
					            os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"),
 | 
				
			||||||
 | 
					            False,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
 | 
				
			||||||
        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
					        self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
 | 
				
			||||||
@ -456,7 +472,8 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        os.makedirs(os.path.join(tmp, "notempty", "empty"))
 | 
					        os.makedirs(os.path.join(tmp, "notempty", "empty"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        delete_empty_directories(
 | 
					        delete_empty_directories(
 | 
				
			||||||
            os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR
 | 
					            os.path.join(tmp, "notempty", "empty"),
 | 
				
			||||||
 | 
					            root=settings.ORIGINALS_DIR,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
 | 
					        self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
 | 
				
			||||||
        self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
 | 
					        self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
 | 
				
			||||||
@ -483,10 +500,16 @@ class TestFileHandling(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
 | 
				
			||||||
    def test_duplicates(self):
 | 
					    def test_duplicates(self):
 | 
				
			||||||
        document = Document.objects.create(
 | 
					        document = Document.objects.create(
 | 
				
			||||||
            mime_type="application/pdf", title="qwe", checksum="A", pk=1
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            title="qwe",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            pk=1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        document2 = Document.objects.create(
 | 
					        document2 = Document.objects.create(
 | 
				
			||||||
            mime_type="application/pdf", title="qwe", checksum="B", pk=2
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            title="qwe",
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            pk=2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        Path(document.source_path).touch()
 | 
					        Path(document.source_path).touch()
 | 
				
			||||||
        Path(document2.source_path).touch()
 | 
					        Path(document2.source_path).touch()
 | 
				
			||||||
@ -584,10 +607,12 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertTrue(os.path.isfile(doc.source_path))
 | 
					        self.assertTrue(os.path.isfile(doc.source_path))
 | 
				
			||||||
        self.assertTrue(os.path.isfile(doc.archive_path))
 | 
					        self.assertTrue(os.path.isfile(doc.archive_path))
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")
 | 
					            doc.source_path,
 | 
				
			||||||
 | 
					            os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
 | 
					            doc.archive_path,
 | 
				
			||||||
 | 
					            os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
 | 
				
			||||||
@ -851,7 +876,10 @@ class TestFilenameGeneration(TestCase):
 | 
				
			|||||||
    def test_invalid_characters(self):
 | 
					    def test_invalid_characters(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1"
 | 
					            title="This. is the title.",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            pk=1,
 | 
				
			||||||
 | 
					            checksum="1",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(generate_filename(doc), "This. is the title.pdf")
 | 
					        self.assertEqual(generate_filename(doc), "This. is the title.pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -877,7 +905,9 @@ class TestFilenameGeneration(TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def run():
 | 
					def run():
 | 
				
			||||||
    doc = Document.objects.create(
 | 
					    doc = Document.objects.create(
 | 
				
			||||||
        checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow"
 | 
					        checksum=str(uuid.uuid4()),
 | 
				
			||||||
 | 
					        title=str(uuid.uuid4()),
 | 
				
			||||||
 | 
					        content="wow",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    doc.filename = generate_unique_filename(doc)
 | 
					    doc.filename = generate_unique_filename(doc)
 | 
				
			||||||
    Path(doc.thumbnail_path).touch()
 | 
					    Path(doc.thumbnail_path).touch()
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,7 @@
 | 
				
			|||||||
from django.core.management.base import CommandError
 | 
					from django.core.management.base import CommandError
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.settings import EXPORTER_FILE_NAME
 | 
					from documents.settings import EXPORTER_FILE_NAME
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..management.commands.document_importer import Command
 | 
					from ..management.commands.document_importer import Command
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -12,7 +12,9 @@ class TestImporter(TestCase):
 | 
				
			|||||||
    def test_check_manifest_exists(self):
 | 
					    def test_check_manifest_exists(self):
 | 
				
			||||||
        cmd = Command()
 | 
					        cmd = Command()
 | 
				
			||||||
        self.assertRaises(
 | 
					        self.assertRaises(
 | 
				
			||||||
            CommandError, cmd._check_manifest_exists, "/tmp/manifest.json"
 | 
					            CommandError,
 | 
				
			||||||
 | 
					            cmd._check_manifest_exists,
 | 
				
			||||||
 | 
					            "/tmp/manifest.json",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_check_manifest(self):
 | 
					    def test_check_manifest(self):
 | 
				
			||||||
@ -26,11 +28,11 @@ class TestImporter(TestCase):
 | 
				
			|||||||
        self.assertTrue("The manifest file contains a record" in str(cm.exception))
 | 
					        self.assertTrue("The manifest file contains a record" in str(cm.exception))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        cmd.manifest = [
 | 
					        cmd.manifest = [
 | 
				
			||||||
            {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}
 | 
					            {"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"},
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
        # self.assertRaises(CommandError, cmd._check_manifest)
 | 
					        # self.assertRaises(CommandError, cmd._check_manifest)
 | 
				
			||||||
        with self.assertRaises(CommandError) as cm:
 | 
					        with self.assertRaises(CommandError) as cm:
 | 
				
			||||||
            cmd._check_manifest()
 | 
					            cmd._check_manifest()
 | 
				
			||||||
        self.assertTrue(
 | 
					        self.assertTrue(
 | 
				
			||||||
            'The manifest file refers to "noexist.pdf"' in str(cm.exception)
 | 
					            'The manifest file refers to "noexist.pdf"' in str(cm.exception),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents import index
 | 
					from documents import index
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
@ -9,7 +8,9 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_auto_complete(self):
 | 
					    def test_auto_complete(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(
 | 
					        doc1 = Document.objects.create(
 | 
				
			||||||
            title="doc1", checksum="A", content="test test2 test3"
 | 
					            title="doc1",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            content="test test2 test3",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
 | 
					        doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
 | 
				
			||||||
        doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
 | 
					        doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
 | 
				
			||||||
@ -21,10 +22,12 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        ix = index.open_index()
 | 
					        ix = index.open_index()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"]
 | 
					            index.autocomplete(ix, "tes"),
 | 
				
			||||||
 | 
					            [b"test3", b"test", b"test2"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(
 | 
					        self.assertListEqual(
 | 
				
			||||||
            index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"]
 | 
					            index.autocomplete(ix, "tes", limit=3),
 | 
				
			||||||
 | 
					            [b"test3", b"test", b"test2"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
 | 
					        self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
 | 
				
			||||||
        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
 | 
					        self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
 | 
				
			||||||
 | 
				
			|||||||
@ -1,16 +1,14 @@
 | 
				
			|||||||
import hashlib
 | 
					 | 
				
			||||||
import tempfile
 | 
					 | 
				
			||||||
import filecmp
 | 
					import filecmp
 | 
				
			||||||
 | 
					import hashlib
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
 | 
					import tempfile
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.file_handling import generate_filename
 | 
					from documents.file_handling import generate_filename
 | 
				
			||||||
from documents.management.commands.document_archiver import handle_document
 | 
					from documents.management.commands.document_archiver import handle_document
 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
@ -34,7 +32,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        doc = self.make_models()
 | 
					        doc = self.make_models()
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
 | 
					            sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        call_command("document_archiver")
 | 
					        call_command("document_archiver")
 | 
				
			||||||
@ -43,7 +42,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        doc = self.make_models()
 | 
					        doc = self.make_models()
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
 | 
					            sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        handle_document(doc.pk)
 | 
					        handle_document(doc.pk)
 | 
				
			||||||
@ -90,7 +90,8 @@ class TestArchiver(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
 | 
					        shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf")
 | 
					            sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.originals_dir, f"document_01.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        handle_document(doc2.pk)
 | 
					        handle_document(doc2.pk)
 | 
				
			||||||
@ -120,7 +121,9 @@ class TestDecryptDocuments(TestCase):
 | 
				
			|||||||
        os.makedirs(thumb_dir, exist_ok=True)
 | 
					        os.makedirs(thumb_dir, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        override_settings(
 | 
					        override_settings(
 | 
				
			||||||
            ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test"
 | 
					            ORIGINALS_DIR=originals_dir,
 | 
				
			||||||
 | 
					            THUMBNAIL_DIR=thumb_dir,
 | 
				
			||||||
 | 
					            PASSPHRASE="test",
 | 
				
			||||||
        ).enable()
 | 
					        ).enable()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
@ -206,7 +209,7 @@ class TestRenamer(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class TestCreateClassifier(TestCase):
 | 
					class TestCreateClassifier(TestCase):
 | 
				
			||||||
    @mock.patch(
 | 
					    @mock.patch(
 | 
				
			||||||
        "documents.management.commands.document_create_classifier.train_classifier"
 | 
					        "documents.management.commands.document_create_classifier.train_classifier",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_create_classifier(self, m):
 | 
					    def test_create_classifier(self, m):
 | 
				
			||||||
        call_command("document_create_classifier")
 | 
					        call_command("document_create_classifier")
 | 
				
			||||||
@ -224,7 +227,10 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_errors(self):
 | 
					    def test_errors(self):
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", content="test", filename="test.pdf", checksum="abc"
 | 
					            title="test",
 | 
				
			||||||
 | 
					            content="test",
 | 
				
			||||||
 | 
					            filename="test.pdf",
 | 
				
			||||||
 | 
					            checksum="abc",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        Path(doc.source_path).touch()
 | 
					        Path(doc.source_path).touch()
 | 
				
			||||||
        Path(doc.thumbnail_path).touch()
 | 
					        Path(doc.thumbnail_path).touch()
 | 
				
			||||||
 | 
				
			|||||||
@ -6,12 +6,13 @@ from time import sleep
 | 
				
			|||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.management import call_command, CommandError
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.test import override_settings, TransactionTestCase
 | 
					from django.core.management import CommandError
 | 
				
			||||||
 | 
					from django.test import override_settings
 | 
				
			||||||
from documents.models import Tag
 | 
					from django.test import TransactionTestCase
 | 
				
			||||||
from documents.consumer import ConsumerError
 | 
					from documents.consumer import ConsumerError
 | 
				
			||||||
from documents.management.commands import document_consumer
 | 
					from documents.management.commands import document_consumer
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -41,7 +42,7 @@ class ConsumerMixin:
 | 
				
			|||||||
        super(ConsumerMixin, self).setUp()
 | 
					        super(ConsumerMixin, self).setUp()
 | 
				
			||||||
        self.t = None
 | 
					        self.t = None
 | 
				
			||||||
        patcher = mock.patch(
 | 
					        patcher = mock.patch(
 | 
				
			||||||
            "documents.management.commands.document_consumer.async_task"
 | 
					            "documents.management.commands.document_consumer.async_task",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.task_mock = patcher.start()
 | 
					        self.task_mock = patcher.start()
 | 
				
			||||||
        self.addCleanup(patcher.stop)
 | 
					        self.addCleanup(patcher.stop)
 | 
				
			||||||
@ -208,13 +209,16 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
 | 
				
			|||||||
        self.t_start()
 | 
					        self.t_start()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE")
 | 
					            self.sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.consumption_dir, ".DS_STORE"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf")
 | 
					            self.sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.consumption_dir, "my_file.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf")
 | 
					            self.sample_file,
 | 
				
			||||||
 | 
					            os.path.join(self.dirs.consumption_dir, "._my_file.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
            self.sample_file,
 | 
					            self.sample_file,
 | 
				
			||||||
@ -258,7 +262,9 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@override_settings(
 | 
					@override_settings(
 | 
				
			||||||
    CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20
 | 
					    CONSUMER_POLLING=1,
 | 
				
			||||||
 | 
					    CONSUMER_POLLING_DELAY=3,
 | 
				
			||||||
 | 
					    CONSUMER_POLLING_RETRY_COUNT=20,
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
class TestConsumerPolling(TestConsumer):
 | 
					class TestConsumerPolling(TestConsumer):
 | 
				
			||||||
    # just do all the tests with polling
 | 
					    # just do all the tests with polling
 | 
				
			||||||
@ -319,7 +325,9 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
 | 
				
			|||||||
        self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
 | 
					        self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20
 | 
					        CONSUMER_POLLING=1,
 | 
				
			||||||
 | 
					        CONSUMER_POLLING_DELAY=1,
 | 
				
			||||||
 | 
					        CONSUMER_POLLING_RETRY_COUNT=20,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_consume_file_with_path_tags_polling(self):
 | 
					    def test_consume_file_with_path_tags_polling(self):
 | 
				
			||||||
        self.test_consume_file_with_path_tags()
 | 
					        self.test_consume_file_with_path_tags()
 | 
				
			||||||
 | 
				
			|||||||
@ -7,13 +7,17 @@ from pathlib import Path
 | 
				
			|||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.management.commands import document_exporter
 | 
					from documents.management.commands import document_exporter
 | 
				
			||||||
from documents.models import Document, Tag, DocumentType, Correspondent
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.sanity_checker import check_sanity
 | 
					from documents.sanity_checker import check_sanity
 | 
				
			||||||
from documents.settings import EXPORTER_FILE_NAME
 | 
					from documents.settings import EXPORTER_FILE_NAME
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin, paperless_environment
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import paperless_environment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestExportImport(DirectoriesMixin, TestCase):
 | 
					class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			||||||
@ -66,8 +70,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def _get_document_from_manifest(self, manifest, id):
 | 
					    def _get_document_from_manifest(self, manifest, id):
 | 
				
			||||||
        f = list(
 | 
					        f = list(
 | 
				
			||||||
            filter(
 | 
					            filter(
 | 
				
			||||||
                lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest
 | 
					                lambda d: d["model"] == "documents.document" and d["pk"] == id,
 | 
				
			||||||
            )
 | 
					                manifest,
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        if len(f) == 1:
 | 
					        if len(f) == 1:
 | 
				
			||||||
            return f[0]
 | 
					            return f[0]
 | 
				
			||||||
@ -76,7 +81,10 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @override_settings(PASSPHRASE="test")
 | 
					    @override_settings(PASSPHRASE="test")
 | 
				
			||||||
    def _do_export(
 | 
					    def _do_export(
 | 
				
			||||||
        self, use_filename_format=False, compare_checksums=False, delete=False
 | 
					        self,
 | 
				
			||||||
 | 
					        use_filename_format=False,
 | 
				
			||||||
 | 
					        compare_checksums=False,
 | 
				
			||||||
 | 
					        delete=False,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        args = ["document_exporter", self.target]
 | 
					        args = ["document_exporter", self.target]
 | 
				
			||||||
        if use_filename_format:
 | 
					        if use_filename_format:
 | 
				
			||||||
@ -104,7 +112,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(len(manifest), 8)
 | 
					        self.assertEqual(len(manifest), 8)
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4
 | 
					            len(list(filter(lambda e: e["model"] == "documents.document", manifest))),
 | 
				
			||||||
 | 
					            4,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
					        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
				
			||||||
@ -129,7 +138,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        for element in manifest:
 | 
					        for element in manifest:
 | 
				
			||||||
            if element["model"] == "documents.document":
 | 
					            if element["model"] == "documents.document":
 | 
				
			||||||
                fname = os.path.join(
 | 
					                fname = os.path.join(
 | 
				
			||||||
                    self.target, element[document_exporter.EXPORTER_FILE_NAME]
 | 
					                    self.target,
 | 
				
			||||||
 | 
					                    element[document_exporter.EXPORTER_FILE_NAME],
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                self.assertTrue(os.path.exists(fname))
 | 
					                self.assertTrue(os.path.exists(fname))
 | 
				
			||||||
                self.assertTrue(
 | 
					                self.assertTrue(
 | 
				
			||||||
@ -137,8 +147,8 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                        os.path.join(
 | 
					                        os.path.join(
 | 
				
			||||||
                            self.target,
 | 
					                            self.target,
 | 
				
			||||||
                            element[document_exporter.EXPORTER_THUMBNAIL_NAME],
 | 
					                            element[document_exporter.EXPORTER_THUMBNAIL_NAME],
 | 
				
			||||||
                        )
 | 
					                        ),
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                with open(fname, "rb") as f:
 | 
					                with open(fname, "rb") as f:
 | 
				
			||||||
@ -146,12 +156,14 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                self.assertEqual(checksum, element["fields"]["checksum"])
 | 
					                self.assertEqual(checksum, element["fields"]["checksum"])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                self.assertEqual(
 | 
					                self.assertEqual(
 | 
				
			||||||
                    element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED
 | 
					                    element["fields"]["storage_type"],
 | 
				
			||||||
 | 
					                    Document.STORAGE_TYPE_UNENCRYPTED,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                if document_exporter.EXPORTER_ARCHIVE_NAME in element:
 | 
					                if document_exporter.EXPORTER_ARCHIVE_NAME in element:
 | 
				
			||||||
                    fname = os.path.join(
 | 
					                    fname = os.path.join(
 | 
				
			||||||
                        self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME]
 | 
					                        self.target,
 | 
				
			||||||
 | 
					                        element[document_exporter.EXPORTER_ARCHIVE_NAME],
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                    self.assertTrue(os.path.exists(fname))
 | 
					                    self.assertTrue(os.path.exists(fname))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -188,7 +200,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with override_settings(
 | 
					        with override_settings(
 | 
				
			||||||
            PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"
 | 
					            PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}",
 | 
				
			||||||
        ):
 | 
					        ):
 | 
				
			||||||
            self.test_exporter(use_filename_format=True)
 | 
					            self.test_exporter(use_filename_format=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -205,7 +217,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
 | 
					        st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.management.commands.document_exporter.shutil.copy2"
 | 
					            "documents.management.commands.document_exporter.shutil.copy2",
 | 
				
			||||||
        ) as m:
 | 
					        ) as m:
 | 
				
			||||||
            self._do_export()
 | 
					            self._do_export()
 | 
				
			||||||
            m.assert_not_called()
 | 
					            m.assert_not_called()
 | 
				
			||||||
@ -216,7 +228,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        Path(self.d1.source_path).touch()
 | 
					        Path(self.d1.source_path).touch()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.management.commands.document_exporter.shutil.copy2"
 | 
					            "documents.management.commands.document_exporter.shutil.copy2",
 | 
				
			||||||
        ) as m:
 | 
					        ) as m:
 | 
				
			||||||
            self._do_export()
 | 
					            self._do_export()
 | 
				
			||||||
            self.assertEqual(m.call_count, 1)
 | 
					            self.assertEqual(m.call_count, 1)
 | 
				
			||||||
@ -239,7 +251,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
					        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.management.commands.document_exporter.shutil.copy2"
 | 
					            "documents.management.commands.document_exporter.shutil.copy2",
 | 
				
			||||||
        ) as m:
 | 
					        ) as m:
 | 
				
			||||||
            self._do_export()
 | 
					            self._do_export()
 | 
				
			||||||
            m.assert_not_called()
 | 
					            m.assert_not_called()
 | 
				
			||||||
@ -250,7 +262,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.d2.save()
 | 
					        self.d2.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch(
 | 
					        with mock.patch(
 | 
				
			||||||
            "documents.management.commands.document_exporter.shutil.copy2"
 | 
					            "documents.management.commands.document_exporter.shutil.copy2",
 | 
				
			||||||
        ) as m:
 | 
					        ) as m:
 | 
				
			||||||
            self._do_export(compare_checksums=True)
 | 
					            self._do_export(compare_checksums=True)
 | 
				
			||||||
            self.assertEqual(m.call_count, 1)
 | 
					            self.assertEqual(m.call_count, 1)
 | 
				
			||||||
@ -270,26 +282,29 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
 | 
					        doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
 | 
				
			||||||
        self.assertTrue(
 | 
					        self.assertTrue(
 | 
				
			||||||
            os.path.isfile(
 | 
					            os.path.isfile(
 | 
				
			||||||
                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
 | 
					                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.d3.delete()
 | 
					        self.d3.delete()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        manifest = self._do_export()
 | 
					        manifest = self._do_export()
 | 
				
			||||||
        self.assertRaises(
 | 
					        self.assertRaises(
 | 
				
			||||||
            ValueError, self._get_document_from_manifest, manifest, self.d3.id
 | 
					            ValueError,
 | 
				
			||||||
 | 
					            self._get_document_from_manifest,
 | 
				
			||||||
 | 
					            manifest,
 | 
				
			||||||
 | 
					            self.d3.id,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(
 | 
					        self.assertTrue(
 | 
				
			||||||
            os.path.isfile(
 | 
					            os.path.isfile(
 | 
				
			||||||
                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
 | 
					                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        manifest = self._do_export(delete=True)
 | 
					        manifest = self._do_export(delete=True)
 | 
				
			||||||
        self.assertFalse(
 | 
					        self.assertFalse(
 | 
				
			||||||
            os.path.isfile(
 | 
					            os.path.isfile(
 | 
				
			||||||
                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
 | 
					                os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME]),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue(len(manifest), 6)
 | 
					        self.assertTrue(len(manifest), 6)
 | 
				
			||||||
@ -316,7 +331,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
					        self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
 | 
				
			||||||
        self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
 | 
					        self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
 | 
				
			||||||
        self.assertTrue(
 | 
					        self.assertTrue(
 | 
				
			||||||
            os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf"))
 | 
					            os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_export_missing_files(self):
 | 
					    def test_export_missing_files(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,35 +1,50 @@
 | 
				
			|||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.models import Document, Tag, Correspondent, DocumentType
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestRetagger(DirectoriesMixin, TestCase):
 | 
					class TestRetagger(DirectoriesMixin, TestCase):
 | 
				
			||||||
    def make_models(self):
 | 
					    def make_models(self):
 | 
				
			||||||
        self.d1 = Document.objects.create(
 | 
					        self.d1 = Document.objects.create(
 | 
				
			||||||
            checksum="A", title="A", content="first document"
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            title="A",
 | 
				
			||||||
 | 
					            content="first document",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.d2 = Document.objects.create(
 | 
					        self.d2 = Document.objects.create(
 | 
				
			||||||
            checksum="B", title="B", content="second document"
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            title="B",
 | 
				
			||||||
 | 
					            content="second document",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.d3 = Document.objects.create(
 | 
					        self.d3 = Document.objects.create(
 | 
				
			||||||
            checksum="C", title="C", content="unrelated document"
 | 
					            checksum="C",
 | 
				
			||||||
 | 
					            title="C",
 | 
				
			||||||
 | 
					            content="unrelated document",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.d4 = Document.objects.create(
 | 
					        self.d4 = Document.objects.create(
 | 
				
			||||||
            checksum="D", title="D", content="auto document"
 | 
					            checksum="D",
 | 
				
			||||||
 | 
					            title="D",
 | 
				
			||||||
 | 
					            content="auto document",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.tag_first = Tag.objects.create(
 | 
					        self.tag_first = Tag.objects.create(
 | 
				
			||||||
            name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY
 | 
					            name="tag1",
 | 
				
			||||||
 | 
					            match="first",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.tag_second = Tag.objects.create(
 | 
					        self.tag_second = Tag.objects.create(
 | 
				
			||||||
            name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY
 | 
					            name="tag2",
 | 
				
			||||||
 | 
					            match="second",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
 | 
					        self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
 | 
				
			||||||
        self.tag_no_match = Tag.objects.create(name="test2")
 | 
					        self.tag_no_match = Tag.objects.create(name="test2")
 | 
				
			||||||
        self.tag_auto = Tag.objects.create(
 | 
					        self.tag_auto = Tag.objects.create(
 | 
				
			||||||
            name="tagauto", matching_algorithm=Tag.MATCH_AUTO
 | 
					            name="tagauto",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_AUTO,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.d3.tags.add(self.tag_inbox)
 | 
					        self.d3.tags.add(self.tag_inbox)
 | 
				
			||||||
@ -37,17 +52,25 @@ class TestRetagger(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.d4.tags.add(self.tag_auto)
 | 
					        self.d4.tags.add(self.tag_auto)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.correspondent_first = Correspondent.objects.create(
 | 
					        self.correspondent_first = Correspondent.objects.create(
 | 
				
			||||||
            name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY
 | 
					            name="c1",
 | 
				
			||||||
 | 
					            match="first",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.correspondent_second = Correspondent.objects.create(
 | 
					        self.correspondent_second = Correspondent.objects.create(
 | 
				
			||||||
            name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY
 | 
					            name="c2",
 | 
				
			||||||
 | 
					            match="second",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.doctype_first = DocumentType.objects.create(
 | 
					        self.doctype_first = DocumentType.objects.create(
 | 
				
			||||||
            name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY
 | 
					            name="dt1",
 | 
				
			||||||
 | 
					            match="first",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doctype_second = DocumentType.objects.create(
 | 
					        self.doctype_second = DocumentType.objects.create(
 | 
				
			||||||
            name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY
 | 
					            name="dt2",
 | 
				
			||||||
 | 
					            match="second",
 | 
				
			||||||
 | 
					            matching_algorithm=DocumentType.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_updated_docs(self):
 | 
					    def get_updated_docs(self):
 | 
				
			||||||
@ -98,10 +121,12 @@ class TestRetagger(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
 | 
					        self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            [tag.id for tag in d_first.tags.all()], [self.tag_first.id]
 | 
					            [tag.id for tag in d_first.tags.all()],
 | 
				
			||||||
 | 
					            [self.tag_first.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            [tag.id for tag in d_second.tags.all()], [self.tag_second.id]
 | 
					            [tag.id for tag in d_second.tags.all()],
 | 
				
			||||||
 | 
					            [self.tag_second.id],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertCountEqual(
 | 
					        self.assertCountEqual(
 | 
				
			||||||
            [tag.id for tag in d_unrelated.tags.all()],
 | 
					            [tag.id for tag in d_unrelated.tags.all()],
 | 
				
			||||||
@ -133,7 +158,10 @@ class TestRetagger(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_add_tags_suggest_url(self):
 | 
					    def test_add_tags_suggest_url(self):
 | 
				
			||||||
        call_command(
 | 
					        call_command(
 | 
				
			||||||
            "document_retagger", "--tags", "--suggest", "--base-url=http://localhost"
 | 
					            "document_retagger",
 | 
				
			||||||
 | 
					            "--tags",
 | 
				
			||||||
 | 
					            "--suggest",
 | 
				
			||||||
 | 
					            "--base-url=http://localhost",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
 | 
					        d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -5,9 +5,11 @@ from unittest import mock
 | 
				
			|||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.management.commands.document_thumbnails import _process_document
 | 
					from documents.management.commands.document_thumbnails import _process_document
 | 
				
			||||||
from documents.models import Document, Tag, Correspondent, DocumentType
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -4,9 +4,11 @@ from unittest import mock
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.management.commands.document_thumbnails import _process_document
 | 
					from documents.management.commands.document_thumbnails import _process_document
 | 
				
			||||||
from documents.models import Document, Tag, Correspondent, DocumentType
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -4,10 +4,14 @@ from random import randint
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.contrib.admin.models import LogEntry
 | 
					from django.contrib.admin.models import LogEntry
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .. import matching
 | 
					from .. import matching
 | 
				
			||||||
from ..models import Correspondent, Document, Tag, DocumentType
 | 
					from ..models import Correspondent
 | 
				
			||||||
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					from ..models import DocumentType
 | 
				
			||||||
 | 
					from ..models import Tag
 | 
				
			||||||
from ..signals import document_consumption_finished
 | 
					from ..signals import document_consumption_finished
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -209,7 +213,8 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
 | 
				
			|||||||
        TestCase.setUp(self)
 | 
					        TestCase.setUp(self)
 | 
				
			||||||
        User.objects.create_user(username="test_consumer", password="12345")
 | 
					        User.objects.create_user(username="test_consumer", password="12345")
 | 
				
			||||||
        self.doc_contains = Document.objects.create(
 | 
					        self.doc_contains = Document.objects.create(
 | 
				
			||||||
            content="I contain the keyword.", mime_type="application/pdf"
 | 
					            content="I contain the keyword.",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.index_dir = tempfile.mkdtemp()
 | 
					        self.index_dir = tempfile.mkdtemp()
 | 
				
			||||||
@ -221,43 +226,56 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_tag_applied_any(self):
 | 
					    def test_tag_applied_any(self):
 | 
				
			||||||
        t1 = Tag.objects.create(
 | 
					        t1 = Tag.objects.create(
 | 
				
			||||||
            name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
 | 
					            name="test",
 | 
				
			||||||
 | 
					            match="keyword",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        document_consumption_finished.send(
 | 
					        document_consumption_finished.send(
 | 
				
			||||||
            sender=self.__class__, document=self.doc_contains
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            document=self.doc_contains,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
 | 
					        self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_tag_not_applied(self):
 | 
					    def test_tag_not_applied(self):
 | 
				
			||||||
        Tag.objects.create(
 | 
					        Tag.objects.create(
 | 
				
			||||||
            name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
 | 
					            name="test",
 | 
				
			||||||
 | 
					            match="no-match",
 | 
				
			||||||
 | 
					            matching_algorithm=Tag.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        document_consumption_finished.send(
 | 
					        document_consumption_finished.send(
 | 
				
			||||||
            sender=self.__class__, document=self.doc_contains
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            document=self.doc_contains,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(list(self.doc_contains.tags.all()) == [])
 | 
					        self.assertTrue(list(self.doc_contains.tags.all()) == [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_correspondent_applied(self):
 | 
					    def test_correspondent_applied(self):
 | 
				
			||||||
        correspondent = Correspondent.objects.create(
 | 
					        correspondent = Correspondent.objects.create(
 | 
				
			||||||
            name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
 | 
					            name="test",
 | 
				
			||||||
 | 
					            match="keyword",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        document_consumption_finished.send(
 | 
					        document_consumption_finished.send(
 | 
				
			||||||
            sender=self.__class__, document=self.doc_contains
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            document=self.doc_contains,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(self.doc_contains.correspondent == correspondent)
 | 
					        self.assertTrue(self.doc_contains.correspondent == correspondent)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_correspondent_not_applied(self):
 | 
					    def test_correspondent_not_applied(self):
 | 
				
			||||||
        Tag.objects.create(
 | 
					        Tag.objects.create(
 | 
				
			||||||
            name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
 | 
					            name="test",
 | 
				
			||||||
 | 
					            match="no-match",
 | 
				
			||||||
 | 
					            matching_algorithm=Correspondent.MATCH_ANY,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        document_consumption_finished.send(
 | 
					        document_consumption_finished.send(
 | 
				
			||||||
            sender=self.__class__, document=self.doc_contains
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            document=self.doc_contains,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(self.doc_contains.correspondent, None)
 | 
					        self.assertEqual(self.doc_contains.correspondent, None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_logentry_created(self):
 | 
					    def test_logentry_created(self):
 | 
				
			||||||
        document_consumption_finished.send(
 | 
					        document_consumption_finished.send(
 | 
				
			||||||
            sender=self.__class__, document=self.doc_contains
 | 
					            sender=self.__class__,
 | 
				
			||||||
 | 
					            document=self.doc_contains,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(LogEntry.objects.count(), 1)
 | 
					        self.assertEqual(LogEntry.objects.count(), 1)
 | 
				
			||||||
 | 
				
			|||||||
@ -6,9 +6,9 @@ from unittest import mock
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.parsers import ParseError
 | 
					from documents.parsers import ParseError
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import TestMigrations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STORAGE_TYPE_GPG = "gpg"
 | 
					STORAGE_TYPE_GPG = "gpg"
 | 
				
			||||||
@ -93,10 +93,18 @@ def make_test_document(
 | 
				
			|||||||
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
 | 
					simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
 | 
				
			||||||
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
 | 
					simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
 | 
				
			||||||
simple_pdf2 = os.path.join(
 | 
					simple_pdf2 = os.path.join(
 | 
				
			||||||
    os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf"
 | 
					    os.path.dirname(__file__),
 | 
				
			||||||
 | 
					    "samples",
 | 
				
			||||||
 | 
					    "documents",
 | 
				
			||||||
 | 
					    "originals",
 | 
				
			||||||
 | 
					    "0000002.pdf",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
simple_pdf3 = os.path.join(
 | 
					simple_pdf3 = os.path.join(
 | 
				
			||||||
    os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf"
 | 
					    os.path.dirname(__file__),
 | 
				
			||||||
 | 
					    "samples",
 | 
				
			||||||
 | 
					    "documents",
 | 
				
			||||||
 | 
					    "originals",
 | 
				
			||||||
 | 
					    "0000003.pdf",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
 | 
					simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
 | 
				
			||||||
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
 | 
					simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
 | 
				
			||||||
@ -121,19 +129,43 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
            simple_pdf,
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.no_text = make_test_document(
 | 
					        self.no_text = make_test_document(
 | 
				
			||||||
            Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "no-text",
 | 
				
			||||||
 | 
					            "image/png",
 | 
				
			||||||
 | 
					            simple_png2,
 | 
				
			||||||
 | 
					            "no-text.png",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc_no_archive = make_test_document(
 | 
					        self.doc_no_archive = make_test_document(
 | 
				
			||||||
            Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
 | 
					            Document,
 | 
				
			||||||
 | 
					            "no_archive",
 | 
				
			||||||
 | 
					            "text/plain",
 | 
				
			||||||
 | 
					            simple_txt,
 | 
				
			||||||
 | 
					            "no_archive.txt",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.clash1 = make_test_document(
 | 
					        self.clash1 = make_test_document(
 | 
				
			||||||
            Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "clash",
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
 | 
					            "clash.pdf",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.clash2 = make_test_document(
 | 
					        self.clash2 = make_test_document(
 | 
				
			||||||
            Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "clash",
 | 
				
			||||||
 | 
					            "image/jpeg",
 | 
				
			||||||
 | 
					            simple_jpg,
 | 
				
			||||||
 | 
					            "clash.jpg",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.clash3 = make_test_document(
 | 
					        self.clash3 = make_test_document(
 | 
				
			||||||
            Document, "clash", "image/png", simple_png, "clash.png", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "clash",
 | 
				
			||||||
 | 
					            "image/png",
 | 
				
			||||||
 | 
					            simple_png,
 | 
				
			||||||
 | 
					            "clash.png",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.clash4 = make_test_document(
 | 
					        self.clash4 = make_test_document(
 | 
				
			||||||
            Document,
 | 
					            Document,
 | 
				
			||||||
@ -147,7 +179,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
 | 
					        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
 | 
				
			||||||
        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
 | 
					        self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
 | 
				
			||||||
        self.assertNotEqual(
 | 
					        self.assertNotEqual(
 | 
				
			||||||
            archive_path_old(self.clash1), archive_path_old(self.clash4)
 | 
					            archive_path_old(self.clash1),
 | 
				
			||||||
 | 
					            archive_path_old(self.clash4),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def testArchiveFilesMigrated(self):
 | 
					    def testArchiveFilesMigrated(self):
 | 
				
			||||||
@ -171,19 +204,23 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
                self.assertEqual(archive_checksum, doc.archive_checksum)
 | 
					                self.assertEqual(archive_checksum, doc.archive_checksum)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.filter(archive_checksum__isnull=False).count(), 6
 | 
					            Document.objects.filter(archive_checksum__isnull=False).count(),
 | 
				
			||||||
 | 
					            6,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_filenames(self):
 | 
					    def test_filenames(self):
 | 
				
			||||||
        Document = self.apps.get_model("documents", "Document")
 | 
					        Document = self.apps.get_model("documents", "Document")
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
 | 
					            Document.objects.get(id=self.unrelated.id).archive_filename,
 | 
				
			||||||
 | 
					            "unrelated.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
 | 
					            Document.objects.get(id=self.no_text.id).archive_filename,
 | 
				
			||||||
 | 
					            "no-text.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
 | 
					            Document.objects.get(id=self.doc_no_archive.id).archive_filename,
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.clash1.id).archive_filename,
 | 
					            Document.objects.get(id=self.clash1.id).archive_filename,
 | 
				
			||||||
@ -198,7 +235,8 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
            f"{self.clash3.id:07}.pdf",
 | 
					            f"{self.clash3.id:07}.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
 | 
					            Document.objects.get(id=self.clash4.id).archive_filename,
 | 
				
			||||||
 | 
					            "clash.png.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -207,16 +245,20 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
 | 
				
			|||||||
    def test_filenames(self):
 | 
					    def test_filenames(self):
 | 
				
			||||||
        Document = self.apps.get_model("documents", "Document")
 | 
					        Document = self.apps.get_model("documents", "Document")
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
 | 
					            Document.objects.get(id=self.unrelated.id).archive_filename,
 | 
				
			||||||
 | 
					            "unrelated.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
 | 
					            Document.objects.get(id=self.no_text.id).archive_filename,
 | 
				
			||||||
 | 
					            "no-text.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
 | 
					            Document.objects.get(id=self.doc_no_archive.id).archive_filename,
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf"
 | 
					            Document.objects.get(id=self.clash1.id).archive_filename,
 | 
				
			||||||
 | 
					            "none/clash.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.clash2.id).archive_filename,
 | 
					            Document.objects.get(id=self.clash2.id).archive_filename,
 | 
				
			||||||
@ -227,7 +269,8 @@ class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
 | 
				
			|||||||
            "none/clash_02.pdf",
 | 
					            "none/clash_02.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
 | 
					            Document.objects.get(id=self.clash4.id).archive_filename,
 | 
				
			||||||
 | 
					            "clash.png.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -248,12 +291,19 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        Document = self.apps.get_model("documents", "Document")
 | 
					        Document = self.apps.get_model("documents", "Document")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = make_test_document(
 | 
					        doc = make_test_document(
 | 
				
			||||||
            Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "clash",
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
 | 
					            "clash.pdf",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        os.unlink(archive_path_old(doc))
 | 
					        os.unlink(archive_path_old(doc))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertRaisesMessage(
 | 
					        self.assertRaisesMessage(
 | 
				
			||||||
            ValueError, "does not exist at: ", self.performMigration
 | 
					            ValueError,
 | 
				
			||||||
 | 
					            "does not exist at: ",
 | 
				
			||||||
 | 
					            self.performMigration,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_parser_missing(self):
 | 
					    def test_parser_missing(self):
 | 
				
			||||||
@ -277,7 +327,9 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertRaisesMessage(
 | 
					        self.assertRaisesMessage(
 | 
				
			||||||
            ValueError, "no parsers are available", self.performMigration
 | 
					            ValueError,
 | 
				
			||||||
 | 
					            "no parsers are available",
 | 
				
			||||||
 | 
					            self.performMigration,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
 | 
					    @mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
 | 
				
			||||||
@ -286,7 +338,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        Document = self.apps.get_model("documents", "Document")
 | 
					        Document = self.apps.get_model("documents", "Document")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = make_test_document(
 | 
					        doc1 = make_test_document(
 | 
				
			||||||
            Document, "document", "image/png", simple_png, "document.png", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "document",
 | 
				
			||||||
 | 
					            "image/png",
 | 
				
			||||||
 | 
					            simple_png,
 | 
				
			||||||
 | 
					            "document.png",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = make_test_document(
 | 
					        doc2 = make_test_document(
 | 
				
			||||||
            Document,
 | 
					            Document,
 | 
				
			||||||
@ -311,8 +368,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
                    filter(
 | 
					                    filter(
 | 
				
			||||||
                        lambda log: "Parse error, will try again in 5 seconds" in log,
 | 
					                        lambda log: "Parse error, will try again in 5 seconds" in log,
 | 
				
			||||||
                        capture.output,
 | 
					                        capture.output,
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            4,
 | 
					            4,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -324,8 +381,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
                        lambda log: "Unable to regenerate archive document for ID:"
 | 
					                        lambda log: "Unable to regenerate archive document for ID:"
 | 
				
			||||||
                        in log,
 | 
					                        in log,
 | 
				
			||||||
                        capture.output,
 | 
					                        capture.output,
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            2,
 | 
					            2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -347,7 +404,12 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        Document = self.apps.get_model("documents", "Document")
 | 
					        Document = self.apps.get_model("documents", "Document")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = make_test_document(
 | 
					        doc1 = make_test_document(
 | 
				
			||||||
            Document, "document", "image/png", simple_png, "document.png", simple_pdf
 | 
					            Document,
 | 
				
			||||||
 | 
					            "document",
 | 
				
			||||||
 | 
					            "image/png",
 | 
				
			||||||
 | 
					            simple_png,
 | 
				
			||||||
 | 
					            "document.png",
 | 
				
			||||||
 | 
					            simple_pdf,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc2 = make_test_document(
 | 
					        doc2 = make_test_document(
 | 
				
			||||||
            Document,
 | 
					            Document,
 | 
				
			||||||
@ -368,8 +430,8 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
                        lambda log: "Parser did not return an archive document for document"
 | 
					                        lambda log: "Parser did not return an archive document for document"
 | 
				
			||||||
                        in log,
 | 
					                        in log,
 | 
				
			||||||
                        capture.output,
 | 
					                        capture.output,
 | 
				
			||||||
                    )
 | 
					                    ),
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ),
 | 
					            ),
 | 
				
			||||||
            2,
 | 
					            2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
@ -405,7 +467,11 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
            "unrelated.pdf",
 | 
					            "unrelated.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        doc_no_archive = make_test_document(
 | 
					        doc_no_archive = make_test_document(
 | 
				
			||||||
            Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
 | 
					            Document,
 | 
				
			||||||
 | 
					            "no_archive",
 | 
				
			||||||
 | 
					            "text/plain",
 | 
				
			||||||
 | 
					            simple_txt,
 | 
				
			||||||
 | 
					            "no_archive.txt",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        clashB = make_test_document(
 | 
					        clashB = make_test_document(
 | 
				
			||||||
            Document,
 | 
					            Document,
 | 
				
			||||||
@ -434,13 +500,14 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
                self.assertEqual(archive_checksum, doc.archive_checksum)
 | 
					                self.assertEqual(archive_checksum, doc.archive_checksum)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            Document.objects.filter(archive_checksum__isnull=False).count(), 2
 | 
					            Document.objects.filter(archive_checksum__isnull=False).count(),
 | 
				
			||||||
 | 
					            2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
 | 
					@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
 | 
				
			||||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
 | 
					class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
 | 
				
			||||||
    TestMigrateArchiveFilesBackwards
 | 
					    TestMigrateArchiveFilesBackwards,
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    pass
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -505,5 +572,7 @@ class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertRaisesMessage(
 | 
					        self.assertRaisesMessage(
 | 
				
			||||||
            ValueError, "file already exists.", self.performMigration
 | 
					            ValueError,
 | 
				
			||||||
 | 
					            "file already exists.",
 | 
				
			||||||
 | 
					            self.performMigration,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
				
			|||||||
@ -3,9 +3,9 @@ import shutil
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.parsers import get_default_file_extension
 | 
					from documents.parsers import get_default_file_extension
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import TestMigrations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 | 
					STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 | 
				
			||||||
STORAGE_TYPE_GPG = "gpg"
 | 
					STORAGE_TYPE_GPG = "gpg"
 | 
				
			||||||
@ -46,7 +46,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
    def setUpBeforeMigration(self, apps):
 | 
					    def setUpBeforeMigration(self, apps):
 | 
				
			||||||
        Document = apps.get_model("documents", "Document")
 | 
					        Document = apps.get_model("documents", "Document")
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", file_type="pdf", filename="file1.pdf"
 | 
					            title="test",
 | 
				
			||||||
 | 
					            file_type="pdf",
 | 
				
			||||||
 | 
					            filename="file1.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc_id = doc.id
 | 
					        self.doc_id = doc.id
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
@ -55,7 +57,9 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc2 = Document.objects.create(
 | 
					        doc2 = Document.objects.create(
 | 
				
			||||||
            checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					            file_type="pdf",
 | 
				
			||||||
 | 
					            storage_type=STORAGE_TYPE_GPG,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc2_id = doc2.id
 | 
					        self.doc2_id = doc2.id
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
@ -88,7 +92,9 @@ class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
 | 
				
			|||||||
    def setUpBeforeMigration(self, apps):
 | 
					    def setUpBeforeMigration(self, apps):
 | 
				
			||||||
        Document = apps.get_model("documents", "Document")
 | 
					        Document = apps.get_model("documents", "Document")
 | 
				
			||||||
        doc = Document.objects.create(
 | 
					        doc = Document.objects.create(
 | 
				
			||||||
            title="test", mime_type="application/pdf", filename="file1.pdf"
 | 
					            title="test",
 | 
				
			||||||
 | 
					            mime_type="application/pdf",
 | 
				
			||||||
 | 
					            filename="file1.pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.doc_id = doc.id
 | 
					        self.doc_id = doc.id
 | 
				
			||||||
        shutil.copy(
 | 
					        shutil.copy(
 | 
				
			||||||
 | 
				
			|||||||
@ -1,4 +1,5 @@
 | 
				
			|||||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import TestMigrations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
 | 
					class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,4 +1,5 @@
 | 
				
			|||||||
from documents.tests.utils import DirectoriesMixin, TestMigrations
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from documents.tests.utils import TestMigrations
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
 | 
					class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,9 @@
 | 
				
			|||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .factories import DocumentFactory, CorrespondentFactory
 | 
					from ..models import Correspondent
 | 
				
			||||||
from ..models import Document, Correspondent
 | 
					from ..models import Document
 | 
				
			||||||
 | 
					from .factories import CorrespondentFactory
 | 
				
			||||||
 | 
					from .factories import DocumentFactory
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class CorrespondentTestCase(TestCase):
 | 
					class CorrespondentTestCase(TestCase):
 | 
				
			||||||
 | 
				
			|||||||
@ -4,16 +4,14 @@ import tempfile
 | 
				
			|||||||
from tempfile import TemporaryDirectory
 | 
					from tempfile import TemporaryDirectory
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.parsers import (
 | 
					from documents.parsers import DocumentParser
 | 
				
			||||||
    get_parser_class,
 | 
					from documents.parsers import get_default_file_extension
 | 
				
			||||||
    get_supported_file_extensions,
 | 
					from documents.parsers import get_parser_class
 | 
				
			||||||
    get_default_file_extension,
 | 
					from documents.parsers import get_parser_class_for_mime_type
 | 
				
			||||||
    get_parser_class_for_mime_type,
 | 
					from documents.parsers import get_supported_file_extensions
 | 
				
			||||||
    DocumentParser,
 | 
					from documents.parsers import is_file_ext_supported
 | 
				
			||||||
    is_file_ext_supported,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
 | 
					from paperless_tesseract.parsers import RasterisedDocumentParser
 | 
				
			||||||
from paperless_text.parsers import TextDocumentParser
 | 
					from paperless_text.parsers import TextDocumentParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -6,9 +6,9 @@ from pathlib import Path
 | 
				
			|||||||
import filelock
 | 
					import filelock
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Document
 | 
					from documents.models import Document
 | 
				
			||||||
from documents.sanity_checker import check_sanity, SanityCheckMessages
 | 
					from documents.sanity_checker import check_sanity
 | 
				
			||||||
 | 
					from documents.sanity_checker import SanityCheckMessages
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -23,7 +23,8 @@ class TestSanityCheckMessages(TestCase):
 | 
				
			|||||||
            self.assertEqual(len(capture.output), 1)
 | 
					            self.assertEqual(len(capture.output), 1)
 | 
				
			||||||
            self.assertEqual(capture.records[0].levelno, logging.INFO)
 | 
					            self.assertEqual(capture.records[0].levelno, logging.INFO)
 | 
				
			||||||
            self.assertEqual(
 | 
					            self.assertEqual(
 | 
				
			||||||
                capture.records[0].message, "Sanity checker detected no issues."
 | 
					                capture.records[0].message,
 | 
				
			||||||
 | 
					                "Sanity checker detected no issues.",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_info(self):
 | 
					    def test_info(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -2,8 +2,8 @@ import logging
 | 
				
			|||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					from paperless.settings import default_task_workers
 | 
				
			||||||
from paperless.settings import default_task_workers, default_threads_per_worker
 | 
					from paperless.settings import default_threads_per_worker
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestSettings(TestCase):
 | 
					class TestSettings(TestCase):
 | 
				
			||||||
@ -21,7 +21,7 @@ class TestSettings(TestCase):
 | 
				
			|||||||
    def test_workers_threads(self):
 | 
					    def test_workers_threads(self):
 | 
				
			||||||
        for i in range(1, 64):
 | 
					        for i in range(1, 64):
 | 
				
			||||||
            with mock.patch(
 | 
					            with mock.patch(
 | 
				
			||||||
                "paperless.settings.multiprocessing.cpu_count"
 | 
					                "paperless.settings.multiprocessing.cpu_count",
 | 
				
			||||||
            ) as cpu_count:
 | 
					            ) as cpu_count:
 | 
				
			||||||
                cpu_count.return_value = i
 | 
					                cpu_count.return_value = i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -4,10 +4,13 @@ from unittest import mock
 | 
				
			|||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents import tasks
 | 
					from documents import tasks
 | 
				
			||||||
from documents.models import Document, Tag, Correspondent, DocumentType
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.sanity_checker import SanityCheckMessages, SanityCheckFailedException
 | 
					from documents.models import Document
 | 
				
			||||||
 | 
					from documents.models import DocumentType
 | 
				
			||||||
 | 
					from documents.models import Tag
 | 
				
			||||||
 | 
					from documents.sanity_checker import SanityCheckFailedException
 | 
				
			||||||
 | 
					from documents.sanity_checker import SanityCheckMessages
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -106,7 +109,8 @@ class TestTasks(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        messages.warning("Some warning")
 | 
					        messages.warning("Some warning")
 | 
				
			||||||
        m.return_value = messages
 | 
					        m.return_value = messages
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            tasks.sanity_check(), "Sanity check exited with warnings. See log."
 | 
					            tasks.sanity_check(),
 | 
				
			||||||
 | 
					            "Sanity check exited with warnings. See log.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        m.assert_called_once()
 | 
					        m.assert_called_once()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -116,7 +120,8 @@ class TestTasks(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        messages.info("Some info")
 | 
					        messages.info("Some info")
 | 
				
			||||||
        m.return_value = messages
 | 
					        m.return_value = messages
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
            tasks.sanity_check(), "Sanity check exited with infos. See log."
 | 
					            tasks.sanity_check(),
 | 
				
			||||||
 | 
					            "Sanity check exited with infos. See log.",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        m.assert_called_once()
 | 
					        m.assert_called_once()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -25,7 +25,7 @@ class TestViews(TestCase):
 | 
				
			|||||||
        ]:
 | 
					        ]:
 | 
				
			||||||
            if language_given:
 | 
					            if language_given:
 | 
				
			||||||
                self.client.cookies.load(
 | 
					                self.client.cookies.load(
 | 
				
			||||||
                    {settings.LANGUAGE_COOKIE_NAME: language_given}
 | 
					                    {settings.LANGUAGE_COOKIE_NAME: language_given},
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
 | 
					            elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
 | 
				
			||||||
                self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
 | 
					                self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
 | 
				
			||||||
@ -51,5 +51,6 @@ class TestViews(TestCase):
 | 
				
			|||||||
                f"frontend/{language_actual}/polyfills.js",
 | 
					                f"frontend/{language_actual}/polyfills.js",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            self.assertEqual(
 | 
					            self.assertEqual(
 | 
				
			||||||
                response.context_data["main_js"], f"frontend/{language_actual}/main.js"
 | 
					                response.context_data["main_js"],
 | 
				
			||||||
 | 
					                f"frontend/{language_actual}/main.js",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
				
			|||||||
@ -7,7 +7,8 @@ from contextlib import contextmanager
 | 
				
			|||||||
from django.apps import apps
 | 
					from django.apps import apps
 | 
				
			||||||
from django.db import connection
 | 
					from django.db import connection
 | 
				
			||||||
from django.db.migrations.executor import MigrationExecutor
 | 
					from django.db.migrations.executor import MigrationExecutor
 | 
				
			||||||
from django.test import override_settings, TransactionTestCase
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TransactionTestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def setup_directories():
 | 
					def setup_directories():
 | 
				
			||||||
@ -97,7 +98,7 @@ class TestMigrations(TransactionTestCase):
 | 
				
			|||||||
        assert (
 | 
					        assert (
 | 
				
			||||||
            self.migrate_from and self.migrate_to
 | 
					            self.migrate_from and self.migrate_to
 | 
				
			||||||
        ), "TestCase '{}' must define migrate_from and migrate_to     properties".format(
 | 
					        ), "TestCase '{}' must define migrate_from and migrate_to     properties".format(
 | 
				
			||||||
            type(self).__name__
 | 
					            type(self).__name__,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.migrate_from = [(self.app, self.migrate_from)]
 | 
					        self.migrate_from = [(self.app, self.migrate_from)]
 | 
				
			||||||
        self.migrate_to = [(self.app, self.migrate_to)]
 | 
					        self.migrate_to = [(self.app, self.migrate_to)]
 | 
				
			||||||
 | 
				
			|||||||
@ -5,63 +5,70 @@ import uuid
 | 
				
			|||||||
import zipfile
 | 
					import zipfile
 | 
				
			||||||
from datetime import datetime
 | 
					from datetime import datetime
 | 
				
			||||||
from time import mktime
 | 
					from time import mktime
 | 
				
			||||||
from urllib.parse import quote_plus
 | 
					 | 
				
			||||||
from unicodedata import normalize
 | 
					from unicodedata import normalize
 | 
				
			||||||
 | 
					from urllib.parse import quote_plus
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db.models import Count, Max, Case, When, IntegerField
 | 
					from django.db.models import Case
 | 
				
			||||||
 | 
					from django.db.models import Count
 | 
				
			||||||
 | 
					from django.db.models import IntegerField
 | 
				
			||||||
 | 
					from django.db.models import Max
 | 
				
			||||||
 | 
					from django.db.models import When
 | 
				
			||||||
from django.db.models.functions import Lower
 | 
					from django.db.models.functions import Lower
 | 
				
			||||||
from django.http import HttpResponse, HttpResponseBadRequest, Http404
 | 
					from django.http import Http404
 | 
				
			||||||
 | 
					from django.http import HttpResponse
 | 
				
			||||||
 | 
					from django.http import HttpResponseBadRequest
 | 
				
			||||||
from django.utils.translation import get_language
 | 
					from django.utils.translation import get_language
 | 
				
			||||||
from django.views.decorators.cache import cache_control
 | 
					from django.views.decorators.cache import cache_control
 | 
				
			||||||
from django.views.generic import TemplateView
 | 
					from django.views.generic import TemplateView
 | 
				
			||||||
from django_filters.rest_framework import DjangoFilterBackend
 | 
					from django_filters.rest_framework import DjangoFilterBackend
 | 
				
			||||||
from django_q.tasks import async_task
 | 
					from django_q.tasks import async_task
 | 
				
			||||||
 | 
					from paperless.db import GnuPG
 | 
				
			||||||
 | 
					from paperless.views import StandardPagination
 | 
				
			||||||
from rest_framework import parsers
 | 
					from rest_framework import parsers
 | 
				
			||||||
from rest_framework.decorators import action
 | 
					from rest_framework.decorators import action
 | 
				
			||||||
from rest_framework.exceptions import NotFound
 | 
					from rest_framework.exceptions import NotFound
 | 
				
			||||||
from rest_framework.filters import OrderingFilter, SearchFilter
 | 
					from rest_framework.filters import OrderingFilter
 | 
				
			||||||
 | 
					from rest_framework.filters import SearchFilter
 | 
				
			||||||
from rest_framework.generics import GenericAPIView
 | 
					from rest_framework.generics import GenericAPIView
 | 
				
			||||||
from rest_framework.mixins import (
 | 
					from rest_framework.mixins import DestroyModelMixin
 | 
				
			||||||
    DestroyModelMixin,
 | 
					from rest_framework.mixins import ListModelMixin
 | 
				
			||||||
    ListModelMixin,
 | 
					from rest_framework.mixins import RetrieveModelMixin
 | 
				
			||||||
    RetrieveModelMixin,
 | 
					from rest_framework.mixins import UpdateModelMixin
 | 
				
			||||||
    UpdateModelMixin,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from rest_framework.permissions import IsAuthenticated
 | 
					from rest_framework.permissions import IsAuthenticated
 | 
				
			||||||
from rest_framework.response import Response
 | 
					from rest_framework.response import Response
 | 
				
			||||||
from rest_framework.views import APIView
 | 
					from rest_framework.views import APIView
 | 
				
			||||||
from rest_framework.viewsets import GenericViewSet, ModelViewSet, ViewSet
 | 
					from rest_framework.viewsets import GenericViewSet
 | 
				
			||||||
 | 
					from rest_framework.viewsets import ModelViewSet
 | 
				
			||||||
 | 
					from rest_framework.viewsets import ViewSet
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from paperless.db import GnuPG
 | 
					from .bulk_download import ArchiveOnlyStrategy
 | 
				
			||||||
from paperless.views import StandardPagination
 | 
					from .bulk_download import OriginalAndArchiveStrategy
 | 
				
			||||||
from .bulk_download import (
 | 
					from .bulk_download import OriginalsOnlyStrategy
 | 
				
			||||||
    OriginalAndArchiveStrategy,
 | 
					 | 
				
			||||||
    OriginalsOnlyStrategy,
 | 
					 | 
				
			||||||
    ArchiveOnlyStrategy,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from .classifier import load_classifier
 | 
					from .classifier import load_classifier
 | 
				
			||||||
from .filters import (
 | 
					from .filters import CorrespondentFilterSet
 | 
				
			||||||
    CorrespondentFilterSet,
 | 
					from .filters import DocumentFilterSet
 | 
				
			||||||
    DocumentFilterSet,
 | 
					from .filters import DocumentTypeFilterSet
 | 
				
			||||||
    TagFilterSet,
 | 
					from .filters import TagFilterSet
 | 
				
			||||||
    DocumentTypeFilterSet,
 | 
					from .matching import match_correspondents
 | 
				
			||||||
)
 | 
					from .matching import match_document_types
 | 
				
			||||||
from .matching import match_correspondents, match_tags, match_document_types
 | 
					from .matching import match_tags
 | 
				
			||||||
from .models import Correspondent, Document, Tag, DocumentType, SavedView
 | 
					from .models import Correspondent
 | 
				
			||||||
 | 
					from .models import Document
 | 
				
			||||||
 | 
					from .models import DocumentType
 | 
				
			||||||
 | 
					from .models import SavedView
 | 
				
			||||||
 | 
					from .models import Tag
 | 
				
			||||||
from .parsers import get_parser_class_for_mime_type
 | 
					from .parsers import get_parser_class_for_mime_type
 | 
				
			||||||
from .serialisers import (
 | 
					from .serialisers import BulkDownloadSerializer
 | 
				
			||||||
    CorrespondentSerializer,
 | 
					from .serialisers import BulkEditSerializer
 | 
				
			||||||
    DocumentSerializer,
 | 
					from .serialisers import CorrespondentSerializer
 | 
				
			||||||
    TagSerializerVersion1,
 | 
					from .serialisers import DocumentListSerializer
 | 
				
			||||||
    TagSerializer,
 | 
					from .serialisers import DocumentSerializer
 | 
				
			||||||
    DocumentTypeSerializer,
 | 
					from .serialisers import DocumentTypeSerializer
 | 
				
			||||||
    PostDocumentSerializer,
 | 
					from .serialisers import PostDocumentSerializer
 | 
				
			||||||
    SavedViewSerializer,
 | 
					from .serialisers import SavedViewSerializer
 | 
				
			||||||
    BulkEditSerializer,
 | 
					from .serialisers import TagSerializer
 | 
				
			||||||
    DocumentListSerializer,
 | 
					from .serialisers import TagSerializerVersion1
 | 
				
			||||||
    BulkDownloadSerializer,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
logger = logging.getLogger("paperless.api")
 | 
					logger = logging.getLogger("paperless.api")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -89,16 +96,14 @@ class IndexView(TemplateView):
 | 
				
			|||||||
        context["full_name"] = self.request.user.get_full_name()
 | 
					        context["full_name"] = self.request.user.get_full_name()
 | 
				
			||||||
        context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
 | 
					        context["styles_css"] = f"frontend/{self.get_language()}/styles.css"
 | 
				
			||||||
        context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
 | 
					        context["runtime_js"] = f"frontend/{self.get_language()}/runtime.js"
 | 
				
			||||||
        context[
 | 
					        context["polyfills_js"] = f"frontend/{self.get_language()}/polyfills.js"
 | 
				
			||||||
            "polyfills_js"
 | 
					 | 
				
			||||||
        ] = f"frontend/{self.get_language()}/polyfills.js"  # NOQA: E501
 | 
					 | 
				
			||||||
        context["main_js"] = f"frontend/{self.get_language()}/main.js"
 | 
					        context["main_js"] = f"frontend/{self.get_language()}/main.js"
 | 
				
			||||||
        context[
 | 
					        context[
 | 
				
			||||||
            "webmanifest"
 | 
					            "webmanifest"
 | 
				
			||||||
        ] = f"frontend/{self.get_language()}/manifest.webmanifest"  # NOQA: E501
 | 
					        ] = f"frontend/{self.get_language()}/manifest.webmanifest"  # noqa: E501
 | 
				
			||||||
        context[
 | 
					        context[
 | 
				
			||||||
            "apple_touch_icon"
 | 
					            "apple_touch_icon"
 | 
				
			||||||
        ] = f"frontend/{self.get_language()}/apple-touch-icon.png"  # NOQA: E501
 | 
					        ] = f"frontend/{self.get_language()}/apple-touch-icon.png"  # noqa: E501
 | 
				
			||||||
        return context
 | 
					        return context
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -106,7 +111,8 @@ class CorrespondentViewSet(ModelViewSet):
 | 
				
			|||||||
    model = Correspondent
 | 
					    model = Correspondent
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    queryset = Correspondent.objects.annotate(
 | 
					    queryset = Correspondent.objects.annotate(
 | 
				
			||||||
        document_count=Count("documents"), last_correspondence=Max("documents__created")
 | 
					        document_count=Count("documents"),
 | 
				
			||||||
 | 
					        last_correspondence=Max("documents__created"),
 | 
				
			||||||
    ).order_by(Lower("name"))
 | 
					    ).order_by(Lower("name"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    serializer_class = CorrespondentSerializer
 | 
					    serializer_class = CorrespondentSerializer
 | 
				
			||||||
@ -127,7 +133,7 @@ class TagViewSet(ModelViewSet):
 | 
				
			|||||||
    model = Tag
 | 
					    model = Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    queryset = Tag.objects.annotate(document_count=Count("documents")).order_by(
 | 
					    queryset = Tag.objects.annotate(document_count=Count("documents")).order_by(
 | 
				
			||||||
        Lower("name")
 | 
					        Lower("name"),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_serializer_class(self):
 | 
					    def get_serializer_class(self):
 | 
				
			||||||
@ -147,7 +153,7 @@ class DocumentTypeViewSet(ModelViewSet):
 | 
				
			|||||||
    model = DocumentType
 | 
					    model = DocumentType
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    queryset = DocumentType.objects.annotate(
 | 
					    queryset = DocumentType.objects.annotate(
 | 
				
			||||||
        document_count=Count("documents")
 | 
					        document_count=Count("documents"),
 | 
				
			||||||
    ).order_by(Lower("name"))
 | 
					    ).order_by(Lower("name"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    serializer_class = DocumentTypeSerializer
 | 
					    serializer_class = DocumentTypeSerializer
 | 
				
			||||||
@ -220,9 +226,7 @@ class DocumentViewSet(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def file_response(self, pk, request, disposition):
 | 
					    def file_response(self, pk, request, disposition):
 | 
				
			||||||
        doc = Document.objects.get(id=pk)
 | 
					        doc = Document.objects.get(id=pk)
 | 
				
			||||||
        if (
 | 
					        if not self.original_requested(request) and doc.has_archive_version:
 | 
				
			||||||
            not self.original_requested(request) and doc.has_archive_version
 | 
					 | 
				
			||||||
        ):  # NOQA: E501
 | 
					 | 
				
			||||||
            file_handle = doc.archive_file
 | 
					            file_handle = doc.archive_file
 | 
				
			||||||
            filename = doc.get_public_filename(archive=True)
 | 
					            filename = doc.get_public_filename(archive=True)
 | 
				
			||||||
            mime_type = "application/pdf"
 | 
					            mime_type = "application/pdf"
 | 
				
			||||||
@ -258,7 +262,7 @@ class DocumentViewSet(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                return parser.extract_metadata(file, mime_type)
 | 
					                return parser.extract_metadata(file, mime_type)
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception:
 | 
				
			||||||
                # TODO: cover GPG errors, remove later.
 | 
					                # TODO: cover GPG errors, remove later.
 | 
				
			||||||
                return []
 | 
					                return []
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -291,7 +295,8 @@ class DocumentViewSet(
 | 
				
			|||||||
        if doc.has_archive_version:
 | 
					        if doc.has_archive_version:
 | 
				
			||||||
            meta["archive_size"] = self.get_filesize(doc.archive_path)
 | 
					            meta["archive_size"] = self.get_filesize(doc.archive_path)
 | 
				
			||||||
            meta["archive_metadata"] = self.get_metadata(
 | 
					            meta["archive_metadata"] = self.get_metadata(
 | 
				
			||||||
                doc.archive_path, "application/pdf"
 | 
					                doc.archive_path,
 | 
				
			||||||
 | 
					                "application/pdf",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            meta["archive_size"] = None
 | 
					            meta["archive_size"] = None
 | 
				
			||||||
@ -315,7 +320,7 @@ class DocumentViewSet(
 | 
				
			|||||||
                "document_types": [
 | 
					                "document_types": [
 | 
				
			||||||
                    dt.id for dt in match_document_types(doc, classifier)
 | 
					                    dt.id for dt in match_document_types(doc, classifier)
 | 
				
			||||||
                ],
 | 
					                ],
 | 
				
			||||||
            }
 | 
					            },
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @action(methods=["get"], detail=True)
 | 
					    @action(methods=["get"], detail=True)
 | 
				
			||||||
@ -357,7 +362,7 @@ class SearchResultSerializer(DocumentSerializer):
 | 
				
			|||||||
            "score": instance.score,
 | 
					            "score": instance.score,
 | 
				
			||||||
            "highlights": instance.highlights("content", text=doc.content)
 | 
					            "highlights": instance.highlights("content", text=doc.content)
 | 
				
			||||||
            if doc
 | 
					            if doc
 | 
				
			||||||
            else None,  # NOQA: E501
 | 
					            else None,
 | 
				
			||||||
            "rank": instance.rank,
 | 
					            "rank": instance.rank,
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -500,7 +505,9 @@ class PostDocumentView(GenericAPIView):
 | 
				
			|||||||
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
					        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with tempfile.NamedTemporaryFile(
 | 
					        with tempfile.NamedTemporaryFile(
 | 
				
			||||||
            prefix="paperless-upload-", dir=settings.SCRATCH_DIR, delete=False
 | 
					            prefix="paperless-upload-",
 | 
				
			||||||
 | 
					            dir=settings.SCRATCH_DIR,
 | 
				
			||||||
 | 
					            delete=False,
 | 
				
			||||||
        ) as f:
 | 
					        ) as f:
 | 
				
			||||||
            f.write(doc_data)
 | 
					            f.write(doc_data)
 | 
				
			||||||
            os.utime(f.name, times=(t, t))
 | 
					            os.utime(f.name, times=(t, t))
 | 
				
			||||||
@ -537,20 +544,20 @@ class SelectionDataView(GenericAPIView):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        correspondents = Correspondent.objects.annotate(
 | 
					        correspondents = Correspondent.objects.annotate(
 | 
				
			||||||
            document_count=Count(
 | 
					            document_count=Count(
 | 
				
			||||||
                Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
 | 
					                Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tags = Tag.objects.annotate(
 | 
					        tags = Tag.objects.annotate(
 | 
				
			||||||
            document_count=Count(
 | 
					            document_count=Count(
 | 
				
			||||||
                Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
 | 
					                Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        types = DocumentType.objects.annotate(
 | 
					        types = DocumentType.objects.annotate(
 | 
				
			||||||
            document_count=Count(
 | 
					            document_count=Count(
 | 
				
			||||||
                Case(When(documents__id__in=ids, then=1), output_field=IntegerField())
 | 
					                Case(When(documents__id__in=ids, then=1), output_field=IntegerField()),
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        r = Response(
 | 
					        r = Response(
 | 
				
			||||||
@ -565,7 +572,7 @@ class SelectionDataView(GenericAPIView):
 | 
				
			|||||||
                "selected_document_types": [
 | 
					                "selected_document_types": [
 | 
				
			||||||
                    {"id": t.id, "document_count": t.document_count} for t in types
 | 
					                    {"id": t.id, "document_count": t.document_count} for t in types
 | 
				
			||||||
                ],
 | 
					                ],
 | 
				
			||||||
            }
 | 
					            },
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return r
 | 
					        return r
 | 
				
			||||||
@ -612,7 +619,7 @@ class StatisticsView(APIView):
 | 
				
			|||||||
            {
 | 
					            {
 | 
				
			||||||
                "documents_total": documents_total,
 | 
					                "documents_total": documents_total,
 | 
				
			||||||
                "documents_inbox": documents_inbox,
 | 
					                "documents_inbox": documents_inbox,
 | 
				
			||||||
            }
 | 
					            },
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -632,7 +639,9 @@ class BulkDownloadView(GenericAPIView):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
					        os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
				
			||||||
        temp = tempfile.NamedTemporaryFile(
 | 
					        temp = tempfile.NamedTemporaryFile(
 | 
				
			||||||
            dir=settings.SCRATCH_DIR, suffix="-compressed-archive", delete=False
 | 
					            dir=settings.SCRATCH_DIR,
 | 
				
			||||||
 | 
					            suffix="-compressed-archive",
 | 
				
			||||||
 | 
					            delete=False,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if content == "both":
 | 
					        if content == "both":
 | 
				
			||||||
@ -651,7 +660,8 @@ class BulkDownloadView(GenericAPIView):
 | 
				
			|||||||
        with open(temp.name, "rb") as f:
 | 
					        with open(temp.name, "rb") as f:
 | 
				
			||||||
            response = HttpResponse(f, content_type="application/zip")
 | 
					            response = HttpResponse(f, content_type="application/zip")
 | 
				
			||||||
            response["Content-Disposition"] = '{}; filename="{}"'.format(
 | 
					            response["Content-Disposition"] = '{}; filename="{}"'.format(
 | 
				
			||||||
                "attachment", "documents.zip"
 | 
					                "attachment",
 | 
				
			||||||
 | 
					                "documents.zip",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            return response
 | 
					            return response
 | 
				
			||||||
 | 
				
			|||||||
@ -1 +1,4 @@
 | 
				
			|||||||
from .checks import paths_check, binaries_check
 | 
					from .checks import binaries_check
 | 
				
			||||||
 | 
					from .checks import paths_check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ["binaries_check", "paths_check"]
 | 
				
			||||||
 | 
				
			|||||||
@ -9,14 +9,14 @@ from django.core.asgi import get_asgi_application
 | 
				
			|||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
 | 
					os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
 | 
				
			||||||
django_asgi_app = get_asgi_application()
 | 
					django_asgi_app = get_asgi_application()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from channels.auth import AuthMiddlewareStack  # NOQA: E402
 | 
					from channels.auth import AuthMiddlewareStack  # noqa: E402
 | 
				
			||||||
from channels.routing import ProtocolTypeRouter, URLRouter  # NOQA: E402
 | 
					from channels.routing import ProtocolTypeRouter, URLRouter  # noqa: E402
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from paperless.urls import websocket_urlpatterns  # NOQA: E402
 | 
					from paperless.urls import websocket_urlpatterns  # noqa: E402
 | 
				
			||||||
 | 
					
 | 
				
			||||||
application = ProtocolTypeRouter(
 | 
					application = ProtocolTypeRouter(
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        "http": get_asgi_application(),
 | 
					        "http": get_asgi_application(),
 | 
				
			||||||
        "websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)),
 | 
					        "websocket": AuthMiddlewareStack(URLRouter(websocket_urlpatterns)),
 | 
				
			||||||
    }
 | 
					    },
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,9 +1,9 @@
 | 
				
			|||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.contrib import auth
 | 
					from django.contrib import auth
 | 
				
			||||||
 | 
					from django.contrib.auth.middleware import RemoteUserMiddleware
 | 
				
			||||||
from django.contrib.auth.models import User
 | 
					from django.contrib.auth.models import User
 | 
				
			||||||
from django.utils.deprecation import MiddlewareMixin
 | 
					from django.utils.deprecation import MiddlewareMixin
 | 
				
			||||||
from rest_framework import authentication
 | 
					from rest_framework import authentication
 | 
				
			||||||
from django.contrib.auth.middleware import RemoteUserMiddleware
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class AutoLoginMiddleware(MiddlewareMixin):
 | 
					class AutoLoginMiddleware(MiddlewareMixin):
 | 
				
			||||||
@ -25,7 +25,7 @@ class AngularApiAuthenticationOverride(authentication.BaseAuthentication):
 | 
				
			|||||||
            settings.DEBUG
 | 
					            settings.DEBUG
 | 
				
			||||||
            and "Referer" in request.headers
 | 
					            and "Referer" in request.headers
 | 
				
			||||||
            and request.headers["Referer"].startswith("http://localhost:4200/")
 | 
					            and request.headers["Referer"].startswith("http://localhost:4200/")
 | 
				
			||||||
        ):  # NOQA: E501
 | 
					        ):
 | 
				
			||||||
            user = User.objects.filter(is_staff=True).first()
 | 
					            user = User.objects.filter(is_staff=True).first()
 | 
				
			||||||
            print("Auto-Login with user {}".format(user))
 | 
					            print("Auto-Login with user {}".format(user))
 | 
				
			||||||
            return (user, None)
 | 
					            return (user, None)
 | 
				
			||||||
 | 
				
			|||||||
@ -3,7 +3,9 @@ import shutil
 | 
				
			|||||||
import stat
 | 
					import stat
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.checks import Error, Warning, register
 | 
					from django.core.checks import Error
 | 
				
			||||||
 | 
					from django.core.checks import register
 | 
				
			||||||
 | 
					from django.core.checks import Warning
 | 
				
			||||||
 | 
					
 | 
				
			||||||
exists_message = "{} is set but doesn't exist."
 | 
					exists_message = "{} is set but doesn't exist."
 | 
				
			||||||
exists_hint = "Create a directory at {}"
 | 
					exists_hint = "Create a directory at {}"
 | 
				
			||||||
@ -19,11 +21,12 @@ def path_check(var, directory):
 | 
				
			|||||||
    if directory:
 | 
					    if directory:
 | 
				
			||||||
        if not os.path.isdir(directory):
 | 
					        if not os.path.isdir(directory):
 | 
				
			||||||
            messages.append(
 | 
					            messages.append(
 | 
				
			||||||
                Error(exists_message.format(var), exists_hint.format(directory))
 | 
					                Error(exists_message.format(var), exists_hint.format(directory)),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            test_file = os.path.join(
 | 
					            test_file = os.path.join(
 | 
				
			||||||
                directory, f"__paperless_write_test_{os.getpid()}__"
 | 
					                directory,
 | 
				
			||||||
 | 
					                f"__paperless_write_test_{os.getpid()}__",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
                with open(test_file, "w"):
 | 
					                with open(test_file, "w"):
 | 
				
			||||||
@ -34,9 +37,9 @@ def path_check(var, directory):
 | 
				
			|||||||
                        writeable_message.format(var),
 | 
					                        writeable_message.format(var),
 | 
				
			||||||
                        writeable_hint.format(
 | 
					                        writeable_hint.format(
 | 
				
			||||||
                            f"\n{stat.filemode(os.stat(directory).st_mode)} "
 | 
					                            f"\n{stat.filemode(os.stat(directory).st_mode)} "
 | 
				
			||||||
                            f"{directory}\n"
 | 
					                            f"{directory}\n",
 | 
				
			||||||
 | 
					                        ),
 | 
				
			||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                    )
 | 
					 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
            finally:
 | 
					            finally:
 | 
				
			||||||
                if os.path.isfile(test_file):
 | 
					                if os.path.isfile(test_file):
 | 
				
			||||||
@ -88,8 +91,8 @@ def debug_mode_check(app_configs, **kwargs):
 | 
				
			|||||||
                "security issue, since it puts security overides in place which "
 | 
					                "security issue, since it puts security overides in place which "
 | 
				
			||||||
                "are meant to be only used during development. This "
 | 
					                "are meant to be only used during development. This "
 | 
				
			||||||
                "also means that paperless will tell anyone various "
 | 
					                "also means that paperless will tell anyone various "
 | 
				
			||||||
                "debugging information when something goes wrong."
 | 
					                "debugging information when something goes wrong.",
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        return []
 | 
					        return []
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,8 @@
 | 
				
			|||||||
import json
 | 
					import json
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from asgiref.sync import async_to_sync
 | 
					from asgiref.sync import async_to_sync
 | 
				
			||||||
from channels.exceptions import DenyConnection, AcceptConnection
 | 
					from channels.exceptions import AcceptConnection
 | 
				
			||||||
 | 
					from channels.exceptions import DenyConnection
 | 
				
			||||||
from channels.generic.websocket import WebsocketConsumer
 | 
					from channels.generic.websocket import WebsocketConsumer
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -14,13 +15,15 @@ class StatusConsumer(WebsocketConsumer):
 | 
				
			|||||||
            raise DenyConnection()
 | 
					            raise DenyConnection()
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            async_to_sync(self.channel_layer.group_add)(
 | 
					            async_to_sync(self.channel_layer.group_add)(
 | 
				
			||||||
                "status_updates", self.channel_name
 | 
					                "status_updates",
 | 
				
			||||||
 | 
					                self.channel_name,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            raise AcceptConnection()
 | 
					            raise AcceptConnection()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def disconnect(self, close_code):
 | 
					    def disconnect(self, close_code):
 | 
				
			||||||
        async_to_sync(self.channel_layer.group_discard)(
 | 
					        async_to_sync(self.channel_layer.group_discard)(
 | 
				
			||||||
            "status_updates", self.channel_name
 | 
					            "status_updates",
 | 
				
			||||||
 | 
					            self.channel_name,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def status_update(self, event):
 | 
					    def status_update(self, event):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
import gnupg
 | 
					import gnupg
 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless import version
 | 
					from paperless import version
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -5,9 +5,8 @@ import os
 | 
				
			|||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from concurrent_log_handler.queue import setup_logging_queues
 | 
					from concurrent_log_handler.queue import setup_logging_queues
 | 
				
			||||||
from dotenv import load_dotenv
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					from dotenv import load_dotenv
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Tap paperless.conf if it's available
 | 
					# Tap paperless.conf if it's available
 | 
				
			||||||
if os.path.exists("../paperless.conf"):
 | 
					if os.path.exists("../paperless.conf"):
 | 
				
			||||||
@ -68,7 +67,8 @@ MODEL_FILE = os.path.join(DATA_DIR, "classification_model.pickle")
 | 
				
			|||||||
LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
 | 
					LOGGING_DIR = os.getenv("PAPERLESS_LOGGING_DIR", os.path.join(DATA_DIR, "log"))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONSUMPTION_DIR = os.getenv(
 | 
					CONSUMPTION_DIR = os.getenv(
 | 
				
			||||||
    "PAPERLESS_CONSUMPTION_DIR", os.path.join(BASE_DIR, "..", "consume")
 | 
					    "PAPERLESS_CONSUMPTION_DIR",
 | 
				
			||||||
 | 
					    os.path.join(BASE_DIR, "..", "consume"),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# This will be created if it doesn't exist
 | 
					# This will be created if it doesn't exist
 | 
				
			||||||
@ -119,7 +119,7 @@ REST_FRAMEWORK = {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
if DEBUG:
 | 
					if DEBUG:
 | 
				
			||||||
    REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
 | 
					    REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
 | 
				
			||||||
        "paperless.auth.AngularApiAuthenticationOverride"
 | 
					        "paperless.auth.AngularApiAuthenticationOverride",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
MIDDLEWARE = [
 | 
					MIDDLEWARE = [
 | 
				
			||||||
@ -191,7 +191,8 @@ if AUTO_LOGIN_USERNAME:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
 | 
					ENABLE_HTTP_REMOTE_USER = __get_boolean("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
 | 
				
			||||||
HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
 | 
					HTTP_REMOTE_USER_HEADER_NAME = os.getenv(
 | 
				
			||||||
    "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME", "HTTP_REMOTE_USER"
 | 
					    "PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
 | 
				
			||||||
 | 
					    "HTTP_REMOTE_USER",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if ENABLE_HTTP_REMOTE_USER:
 | 
					if ENABLE_HTTP_REMOTE_USER:
 | 
				
			||||||
@ -201,7 +202,7 @@ if ENABLE_HTTP_REMOTE_USER:
 | 
				
			|||||||
        "django.contrib.auth.backends.ModelBackend",
 | 
					        "django.contrib.auth.backends.ModelBackend",
 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
 | 
					    REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
 | 
				
			||||||
        "rest_framework.authentication.RemoteUserAuthentication"
 | 
					        "rest_framework.authentication.RemoteUserAuthentication",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# X-Frame options for embedded PDF display:
 | 
					# X-Frame options for embedded PDF display:
 | 
				
			||||||
@ -212,7 +213,7 @@ else:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
# We allow CORS from localhost:8080
 | 
					# We allow CORS from localhost:8080
 | 
				
			||||||
CORS_ALLOWED_ORIGINS = tuple(
 | 
					CORS_ALLOWED_ORIGINS = tuple(
 | 
				
			||||||
    os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(",")
 | 
					    os.getenv("PAPERLESS_CORS_ALLOWED_HOSTS", "http://localhost:8000").split(","),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if DEBUG:
 | 
					if DEBUG:
 | 
				
			||||||
@ -223,7 +224,8 @@ if DEBUG:
 | 
				
			|||||||
# Paperless on a closed network.  However, if you're putting this anywhere
 | 
					# Paperless on a closed network.  However, if you're putting this anywhere
 | 
				
			||||||
# public, you should change the key to something unique and verbose.
 | 
					# public, you should change the key to something unique and verbose.
 | 
				
			||||||
SECRET_KEY = os.getenv(
 | 
					SECRET_KEY = os.getenv(
 | 
				
			||||||
    "PAPERLESS_SECRET_KEY", "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee"
 | 
					    "PAPERLESS_SECRET_KEY",
 | 
				
			||||||
 | 
					    "e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
 | 
					_allowed_hosts = os.getenv("PAPERLESS_ALLOWED_HOSTS")
 | 
				
			||||||
@ -268,7 +270,7 @@ DATABASES = {
 | 
				
			|||||||
    "default": {
 | 
					    "default": {
 | 
				
			||||||
        "ENGINE": "django.db.backends.sqlite3",
 | 
					        "ENGINE": "django.db.backends.sqlite3",
 | 
				
			||||||
        "NAME": os.path.join(DATA_DIR, "db.sqlite3"),
 | 
					        "NAME": os.path.join(DATA_DIR, "db.sqlite3"),
 | 
				
			||||||
    }
 | 
					    },
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if os.getenv("PAPERLESS_DBHOST"):
 | 
					if os.getenv("PAPERLESS_DBHOST"):
 | 
				
			||||||
@ -423,7 +425,8 @@ def default_threads_per_worker(task_workers):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
THREADS_PER_WORKER = os.getenv(
 | 
					THREADS_PER_WORKER = os.getenv(
 | 
				
			||||||
    "PAPERLESS_THREADS_PER_WORKER", default_threads_per_worker(TASK_WORKERS)
 | 
					    "PAPERLESS_THREADS_PER_WORKER",
 | 
				
			||||||
 | 
					    default_threads_per_worker(TASK_WORKERS),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
###############################################################################
 | 
					###############################################################################
 | 
				
			||||||
@ -435,7 +438,7 @@ CONSUMER_POLLING = int(os.getenv("PAPERLESS_CONSUMER_POLLING", 0))
 | 
				
			|||||||
CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
 | 
					CONSUMER_POLLING_DELAY = int(os.getenv("PAPERLESS_CONSUMER_POLLING_DELAY", 5))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONSUMER_POLLING_RETRY_COUNT = int(
 | 
					CONSUMER_POLLING_RETRY_COUNT = int(
 | 
				
			||||||
    os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5)
 | 
					    os.getenv("PAPERLESS_CONSUMER_POLLING_RETRY_COUNT", 5),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
 | 
					CONSUMER_DELETE_DUPLICATES = __get_boolean("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
 | 
				
			||||||
@ -448,8 +451,8 @@ CONSUMER_IGNORE_PATTERNS = list(
 | 
				
			|||||||
        os.getenv(
 | 
					        os.getenv(
 | 
				
			||||||
            "PAPERLESS_CONSUMER_IGNORE_PATTERNS",
 | 
					            "PAPERLESS_CONSUMER_IGNORE_PATTERNS",
 | 
				
			||||||
            '[".DS_STORE/*", "._*", ".stfolder/*"]',
 | 
					            '[".DS_STORE/*", "._*", ".stfolder/*"]',
 | 
				
			||||||
        )
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    ),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
 | 
					CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
 | 
				
			||||||
@ -479,7 +482,7 @@ OCR_DESKEW = __get_boolean("PAPERLESS_OCR_DESKEW", "true")
 | 
				
			|||||||
OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
 | 
					OCR_ROTATE_PAGES = __get_boolean("PAPERLESS_OCR_ROTATE_PAGES", "true")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OCR_ROTATE_PAGES_THRESHOLD = float(
 | 
					OCR_ROTATE_PAGES_THRESHOLD = float(
 | 
				
			||||||
    os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0)
 | 
					    os.getenv("PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD", 12.0),
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
 | 
					OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS", "{}")
 | 
				
			||||||
@ -536,7 +539,8 @@ THUMBNAIL_FONT_NAME = os.getenv(
 | 
				
			|||||||
PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
 | 
					PAPERLESS_TIKA_ENABLED = __get_boolean("PAPERLESS_TIKA_ENABLED", "NO")
 | 
				
			||||||
PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
 | 
					PAPERLESS_TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
 | 
				
			||||||
PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
 | 
					PAPERLESS_TIKA_GOTENBERG_ENDPOINT = os.getenv(
 | 
				
			||||||
    "PAPERLESS_TIKA_GOTENBERG_ENDPOINT", "http://localhost:3000"
 | 
					    "PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
 | 
				
			||||||
 | 
					    "http://localhost:3000",
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if PAPERLESS_TIKA_ENABLED:
 | 
					if PAPERLESS_TIKA_ENABLED:
 | 
				
			||||||
 | 
				
			|||||||
@ -1,10 +1,11 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import shutil
 | 
					import shutil
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
from paperless import binaries_check, paths_check
 | 
					from paperless import binaries_check
 | 
				
			||||||
 | 
					from paperless import paths_check
 | 
				
			||||||
from paperless.checks import debug_mode_check
 | 
					from paperless.checks import debug_mode_check
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -20,7 +21,9 @@ class TestChecks(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(paths_check(None), [])
 | 
					        self.assertEqual(paths_check(None), [])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(
 | 
					    @override_settings(
 | 
				
			||||||
        MEDIA_ROOT="uuh", DATA_DIR="whatever", CONSUMPTION_DIR="idontcare"
 | 
					        MEDIA_ROOT="uuh",
 | 
				
			||||||
 | 
					        DATA_DIR="whatever",
 | 
				
			||||||
 | 
					        CONSUMPTION_DIR="idontcare",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_paths_check_dont_exist(self):
 | 
					    def test_paths_check_dont_exist(self):
 | 
				
			||||||
        msgs = paths_check(None)
 | 
					        msgs = paths_check(None)
 | 
				
			||||||
 | 
				
			|||||||
@ -2,8 +2,8 @@ from unittest import mock
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from channels.layers import get_channel_layer
 | 
					from channels.layers import get_channel_layer
 | 
				
			||||||
from channels.testing import WebsocketCommunicator
 | 
					from channels.testing import WebsocketCommunicator
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from paperless.asgi import application
 | 
					from paperless.asgi import application
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -46,7 +46,8 @@ class TestWebSockets(TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        channel_layer = get_channel_layer()
 | 
					        channel_layer = get_channel_layer()
 | 
				
			||||||
        await channel_layer.group_send(
 | 
					        await channel_layer.group_send(
 | 
				
			||||||
            "status_updates", {"type": "status_update", "data": message}
 | 
					            "status_updates",
 | 
				
			||||||
 | 
					            {"type": "status_update", "data": message},
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = await communicator.receive_json_from()
 | 
					        response = await communicator.receive_json_from()
 | 
				
			||||||
 | 
				
			|||||||
@ -1,34 +1,30 @@
 | 
				
			|||||||
 | 
					from django.conf import settings
 | 
				
			||||||
from django.conf.urls import include
 | 
					from django.conf.urls import include
 | 
				
			||||||
from django.contrib import admin
 | 
					from django.contrib import admin
 | 
				
			||||||
from django.contrib.auth.decorators import login_required
 | 
					from django.contrib.auth.decorators import login_required
 | 
				
			||||||
from django.urls import path, re_path
 | 
					from django.urls import path
 | 
				
			||||||
 | 
					from django.urls import re_path
 | 
				
			||||||
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
from django.views.decorators.csrf import csrf_exempt
 | 
					from django.views.decorators.csrf import csrf_exempt
 | 
				
			||||||
from django.views.generic import RedirectView
 | 
					from django.views.generic import RedirectView
 | 
				
			||||||
 | 
					from documents.views import BulkDownloadView
 | 
				
			||||||
 | 
					from documents.views import BulkEditView
 | 
				
			||||||
 | 
					from documents.views import CorrespondentViewSet
 | 
				
			||||||
 | 
					from documents.views import DocumentTypeViewSet
 | 
				
			||||||
 | 
					from documents.views import IndexView
 | 
				
			||||||
 | 
					from documents.views import LogViewSet
 | 
				
			||||||
 | 
					from documents.views import PostDocumentView
 | 
				
			||||||
 | 
					from documents.views import SavedViewViewSet
 | 
				
			||||||
 | 
					from documents.views import SearchAutoCompleteView
 | 
				
			||||||
 | 
					from documents.views import SelectionDataView
 | 
				
			||||||
 | 
					from documents.views import StatisticsView
 | 
				
			||||||
 | 
					from documents.views import TagViewSet
 | 
				
			||||||
 | 
					from documents.views import UnifiedSearchViewSet
 | 
				
			||||||
 | 
					from paperless.consumers import StatusConsumer
 | 
				
			||||||
 | 
					from paperless.views import FaviconView
 | 
				
			||||||
from rest_framework.authtoken import views
 | 
					from rest_framework.authtoken import views
 | 
				
			||||||
from rest_framework.routers import DefaultRouter
 | 
					from rest_framework.routers import DefaultRouter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless.consumers import StatusConsumer
 | 
					 | 
				
			||||||
from documents.views import (
 | 
					 | 
				
			||||||
    CorrespondentViewSet,
 | 
					 | 
				
			||||||
    UnifiedSearchViewSet,
 | 
					 | 
				
			||||||
    LogViewSet,
 | 
					 | 
				
			||||||
    TagViewSet,
 | 
					 | 
				
			||||||
    DocumentTypeViewSet,
 | 
					 | 
				
			||||||
    IndexView,
 | 
					 | 
				
			||||||
    SearchAutoCompleteView,
 | 
					 | 
				
			||||||
    StatisticsView,
 | 
					 | 
				
			||||||
    PostDocumentView,
 | 
					 | 
				
			||||||
    SavedViewViewSet,
 | 
					 | 
				
			||||||
    BulkEditView,
 | 
					 | 
				
			||||||
    SelectionDataView,
 | 
					 | 
				
			||||||
    BulkDownloadView,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
from paperless.views import FaviconView
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
api_router = DefaultRouter()
 | 
					api_router = DefaultRouter()
 | 
				
			||||||
api_router.register(r"correspondents", CorrespondentViewSet)
 | 
					api_router.register(r"correspondents", CorrespondentViewSet)
 | 
				
			||||||
api_router.register(r"document_types", DocumentTypeViewSet)
 | 
					api_router.register(r"document_types", DocumentTypeViewSet)
 | 
				
			||||||
@ -62,7 +58,9 @@ urlpatterns = [
 | 
				
			|||||||
                    name="post_document",
 | 
					                    name="post_document",
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                re_path(
 | 
					                re_path(
 | 
				
			||||||
                    r"^documents/bulk_edit/", BulkEditView.as_view(), name="bulk_edit"
 | 
					                    r"^documents/bulk_edit/",
 | 
				
			||||||
 | 
					                    BulkEditView.as_view(),
 | 
				
			||||||
 | 
					                    name="bulk_edit",
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                re_path(
 | 
					                re_path(
 | 
				
			||||||
                    r"^documents/selection_data/",
 | 
					                    r"^documents/selection_data/",
 | 
				
			||||||
@ -76,7 +74,7 @@ urlpatterns = [
 | 
				
			|||||||
                ),
 | 
					                ),
 | 
				
			||||||
                path("token/", views.obtain_auth_token),
 | 
					                path("token/", views.obtain_auth_token),
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
            + api_router.urls
 | 
					            + api_router.urls,
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    ),
 | 
					    ),
 | 
				
			||||||
    re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
 | 
					    re_path(r"^favicon.ico$", FaviconView.as_view(), name="favicon"),
 | 
				
			||||||
@ -88,35 +86,37 @@ urlpatterns = [
 | 
				
			|||||||
                re_path(
 | 
					                re_path(
 | 
				
			||||||
                    r"^doc/(?P<pk>\d+)$",
 | 
					                    r"^doc/(?P<pk>\d+)$",
 | 
				
			||||||
                    RedirectView.as_view(
 | 
					                    RedirectView.as_view(
 | 
				
			||||||
                        url=settings.BASE_URL + "api/documents/%(pk)s/download/"
 | 
					                        url=settings.BASE_URL + "api/documents/%(pk)s/download/",
 | 
				
			||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                re_path(
 | 
					                re_path(
 | 
				
			||||||
                    r"^thumb/(?P<pk>\d+)$",
 | 
					                    r"^thumb/(?P<pk>\d+)$",
 | 
				
			||||||
                    RedirectView.as_view(
 | 
					                    RedirectView.as_view(
 | 
				
			||||||
                        url=settings.BASE_URL + "api/documents/%(pk)s/thumb/"
 | 
					                        url=settings.BASE_URL + "api/documents/%(pk)s/thumb/",
 | 
				
			||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                re_path(
 | 
					                re_path(
 | 
				
			||||||
                    r"^preview/(?P<pk>\d+)$",
 | 
					                    r"^preview/(?P<pk>\d+)$",
 | 
				
			||||||
                    RedirectView.as_view(
 | 
					                    RedirectView.as_view(
 | 
				
			||||||
                        url=settings.BASE_URL + "api/documents/%(pk)s/preview/"
 | 
					                        url=settings.BASE_URL + "api/documents/%(pk)s/preview/",
 | 
				
			||||||
                    ),
 | 
					                    ),
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
            ]
 | 
					            ],
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    ),
 | 
					    ),
 | 
				
			||||||
    re_path(
 | 
					    re_path(
 | 
				
			||||||
        r"^push$",
 | 
					        r"^push$",
 | 
				
			||||||
        csrf_exempt(
 | 
					        csrf_exempt(
 | 
				
			||||||
            RedirectView.as_view(url=settings.BASE_URL + "api/documents/post_document/")
 | 
					            RedirectView.as_view(
 | 
				
			||||||
 | 
					                url=settings.BASE_URL + "api/documents/post_document/",
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    ),
 | 
					    ),
 | 
				
			||||||
    # Frontend assets TODO: this is pretty bad, but it works.
 | 
					    # Frontend assets TODO: this is pretty bad, but it works.
 | 
				
			||||||
    path(
 | 
					    path(
 | 
				
			||||||
        "assets/<path:path>",
 | 
					        "assets/<path:path>",
 | 
				
			||||||
        RedirectView.as_view(
 | 
					        RedirectView.as_view(
 | 
				
			||||||
            url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s"
 | 
					            url=settings.STATIC_URL + "frontend/en-US/assets/%(path)s",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    ),
 | 
					    ),
 | 
				
			||||||
    # TODO: with localization, this is even worse! :/
 | 
					    # TODO: with localization, this is even worse! :/
 | 
				
			||||||
 | 
				
			|||||||
@ -14,7 +14,11 @@ class StandardPagination(PageNumberPagination):
 | 
				
			|||||||
class FaviconView(View):
 | 
					class FaviconView(View):
 | 
				
			||||||
    def get(self, request, *args, **kwargs):
 | 
					    def get(self, request, *args, **kwargs):
 | 
				
			||||||
        favicon = os.path.join(
 | 
					        favicon = os.path.join(
 | 
				
			||||||
            os.path.dirname(__file__), "static", "paperless", "img", "favicon.ico"
 | 
					            os.path.dirname(__file__),
 | 
				
			||||||
 | 
					            "static",
 | 
				
			||||||
 | 
					            "paperless",
 | 
				
			||||||
 | 
					            "img",
 | 
				
			||||||
 | 
					            "favicon.ico",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        with open(favicon, "rb") as f:
 | 
					        with open(favicon, "rb") as f:
 | 
				
			||||||
            return HttpResponse(f, content_type="image/x-icon")
 | 
					            return HttpResponse(f, content_type="image/x-icon")
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
from uvicorn.workers import UvicornWorker
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from uvicorn.workers import UvicornWorker
 | 
				
			||||||
 | 
					
 | 
				
			||||||
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
 | 
					os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -6,7 +6,6 @@ It exposes the WSGI callable as a module-level variable named ``application``.
 | 
				
			|||||||
For more information on this file, see
 | 
					For more information on this file, see
 | 
				
			||||||
https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
 | 
					https://docs.djangoproject.com/en/1.10/howto/deployment/wsgi/
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
 | 
					 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.core.wsgi import get_wsgi_application
 | 
					from django.core.wsgi import get_wsgi_application
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,8 @@
 | 
				
			|||||||
from django.contrib import admin
 | 
					 | 
				
			||||||
from django import forms
 | 
					from django import forms
 | 
				
			||||||
from paperless_mail.models import MailAccount, MailRule
 | 
					from django.contrib import admin
 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
 | 
					from paperless_mail.models import MailRule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MailAccountAdminForm(forms.ModelForm):
 | 
					class MailAccountAdminForm(forms.ModelForm):
 | 
				
			||||||
@ -48,7 +48,7 @@ class MailRuleAdmin(admin.ModelAdmin):
 | 
				
			|||||||
            {
 | 
					            {
 | 
				
			||||||
                "description": _(
 | 
					                "description": _(
 | 
				
			||||||
                    "Paperless will only process mails that match ALL of the "
 | 
					                    "Paperless will only process mails that match ALL of the "
 | 
				
			||||||
                    "filters given below."
 | 
					                    "filters given below.",
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                "fields": (
 | 
					                "fields": (
 | 
				
			||||||
                    "filter_from",
 | 
					                    "filter_from",
 | 
				
			||||||
@ -66,7 +66,7 @@ class MailRuleAdmin(admin.ModelAdmin):
 | 
				
			|||||||
                "description": _(
 | 
					                "description": _(
 | 
				
			||||||
                    "The action applied to the mail. This action is only "
 | 
					                    "The action applied to the mail. This action is only "
 | 
				
			||||||
                    "performed when documents were consumed from the mail. "
 | 
					                    "performed when documents were consumed from the mail. "
 | 
				
			||||||
                    "Mails without attachments will remain entirely untouched."
 | 
					                    "Mails without attachments will remain entirely untouched.",
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                "fields": ("action", "action_parameter"),
 | 
					                "fields": ("action", "action_parameter"),
 | 
				
			||||||
            },
 | 
					            },
 | 
				
			||||||
@ -78,7 +78,7 @@ class MailRuleAdmin(admin.ModelAdmin):
 | 
				
			|||||||
                    "Assign metadata to documents consumed from this rule "
 | 
					                    "Assign metadata to documents consumed from this rule "
 | 
				
			||||||
                    "automatically. If you do not assign tags, types or "
 | 
					                    "automatically. If you do not assign tags, types or "
 | 
				
			||||||
                    "correspondents here, paperless will still process all "
 | 
					                    "correspondents here, paperless will still process all "
 | 
				
			||||||
                    "matching rules that you have defined."
 | 
					                    "matching rules that you have defined.",
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
                "fields": (
 | 
					                "fields": (
 | 
				
			||||||
                    "assign_title_from",
 | 
					                    "assign_title_from",
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.apps import AppConfig
 | 
					from django.apps import AppConfig
 | 
				
			||||||
 | 
					 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
from datetime import timedelta, date
 | 
					from datetime import date
 | 
				
			||||||
 | 
					from datetime import timedelta
 | 
				
			||||||
from fnmatch import fnmatch
 | 
					from fnmatch import fnmatch
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import magic
 | 
					import magic
 | 
				
			||||||
@ -8,18 +9,16 @@ import pathvalidate
 | 
				
			|||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db import DatabaseError
 | 
					from django.db import DatabaseError
 | 
				
			||||||
from django_q.tasks import async_task
 | 
					from django_q.tasks import async_task
 | 
				
			||||||
from imap_tools import (
 | 
					 | 
				
			||||||
    MailBox,
 | 
					 | 
				
			||||||
    MailBoxUnencrypted,
 | 
					 | 
				
			||||||
    AND,
 | 
					 | 
				
			||||||
    MailMessageFlags,
 | 
					 | 
				
			||||||
    MailboxFolderSelectError,
 | 
					 | 
				
			||||||
)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.loggers import LoggingMixin
 | 
					from documents.loggers import LoggingMixin
 | 
				
			||||||
from documents.models import Correspondent
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.parsers import is_mime_type_supported
 | 
					from documents.parsers import is_mime_type_supported
 | 
				
			||||||
from paperless_mail.models import MailAccount, MailRule
 | 
					from imap_tools import AND
 | 
				
			||||||
 | 
					from imap_tools import MailBox
 | 
				
			||||||
 | 
					from imap_tools import MailboxFolderSelectError
 | 
				
			||||||
 | 
					from imap_tools import MailBoxUnencrypted
 | 
				
			||||||
 | 
					from imap_tools import MailMessageFlags
 | 
				
			||||||
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
 | 
					from paperless_mail.models import MailRule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class MailError(Exception):
 | 
					class MailError(Exception):
 | 
				
			||||||
@ -120,8 +119,8 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            raise NotImplementedError(
 | 
					            raise NotImplementedError(
 | 
				
			||||||
                "Unknown title selector."
 | 
					                "Unknown title selector.",
 | 
				
			||||||
            )  # pragma: nocover  # NOQA: E501
 | 
					            )  # pragma: nocover
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def get_correspondent(self, message, rule):
 | 
					    def get_correspondent(self, message, rule):
 | 
				
			||||||
        c_from = rule.assign_correspondent_from
 | 
					        c_from = rule.assign_correspondent_from
 | 
				
			||||||
@ -137,7 +136,7 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
                message.from_values
 | 
					                message.from_values
 | 
				
			||||||
                and "name" in message.from_values
 | 
					                and "name" in message.from_values
 | 
				
			||||||
                and message.from_values["name"]
 | 
					                and message.from_values["name"]
 | 
				
			||||||
            ):  # NOQA: E501
 | 
					            ):
 | 
				
			||||||
                return self._correspondent_from_name(message.from_values["name"])
 | 
					                return self._correspondent_from_name(message.from_values["name"])
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                return self._correspondent_from_name(message.from_)
 | 
					                return self._correspondent_from_name(message.from_)
 | 
				
			||||||
@ -147,8 +146,8 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            raise NotImplementedError(
 | 
					            raise NotImplementedError(
 | 
				
			||||||
                "Unknwown correspondent selector"
 | 
					                "Unknwown correspondent selector",
 | 
				
			||||||
            )  # pragma: nocover  # NOQA: E501
 | 
					            )  # pragma: nocover
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handle_mail_account(self, account):
 | 
					    def handle_mail_account(self, account):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -159,7 +158,9 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
        total_processed_files = 0
 | 
					        total_processed_files = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with get_mailbox(
 | 
					        with get_mailbox(
 | 
				
			||||||
            account.imap_server, account.imap_port, account.imap_security
 | 
					            account.imap_server,
 | 
				
			||||||
 | 
					            account.imap_port,
 | 
				
			||||||
 | 
					            account.imap_security,
 | 
				
			||||||
        ) as M:
 | 
					        ) as M:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            try:
 | 
					            try:
 | 
				
			||||||
@ -193,7 +194,7 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
        except MailboxFolderSelectError:
 | 
					        except MailboxFolderSelectError:
 | 
				
			||||||
            raise MailError(
 | 
					            raise MailError(
 | 
				
			||||||
                f"Rule {rule}: Folder {rule.folder} "
 | 
					                f"Rule {rule}: Folder {rule.folder} "
 | 
				
			||||||
                f"does not exist in account {rule.account}"
 | 
					                f"does not exist in account {rule.account}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        criterias = make_criterias(rule)
 | 
					        criterias = make_criterias(rule)
 | 
				
			||||||
@ -242,12 +243,14 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            get_rule_action(rule).post_consume(
 | 
					            get_rule_action(rule).post_consume(
 | 
				
			||||||
                M, post_consume_messages, rule.action_parameter
 | 
					                M,
 | 
				
			||||||
 | 
					                post_consume_messages,
 | 
				
			||||||
 | 
					                rule.action_parameter,
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        except Exception as e:
 | 
					        except Exception as e:
 | 
				
			||||||
            raise MailError(
 | 
					            raise MailError(
 | 
				
			||||||
                f"Rule {rule}: Error while processing post-consume actions: " f"{e}"
 | 
					                f"Rule {rule}: Error while processing post-consume actions: " f"{e}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return total_processed_files
 | 
					        return total_processed_files
 | 
				
			||||||
@ -274,7 +277,7 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
            if (
 | 
					            if (
 | 
				
			||||||
                not att.content_disposition == "attachment"
 | 
					                not att.content_disposition == "attachment"
 | 
				
			||||||
                and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY
 | 
					                and rule.attachment_type == MailRule.ATTACHMENT_TYPE_ATTACHMENTS_ONLY
 | 
				
			||||||
            ):  # NOQA: E501
 | 
					            ):
 | 
				
			||||||
                self.log(
 | 
					                self.log(
 | 
				
			||||||
                    "debug",
 | 
					                    "debug",
 | 
				
			||||||
                    f"Rule {rule}: "
 | 
					                    f"Rule {rule}: "
 | 
				
			||||||
@ -297,7 +300,8 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
					                os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
				
			||||||
                _, temp_filename = tempfile.mkstemp(
 | 
					                _, temp_filename = tempfile.mkstemp(
 | 
				
			||||||
                    prefix="paperless-mail-", dir=settings.SCRATCH_DIR
 | 
					                    prefix="paperless-mail-",
 | 
				
			||||||
 | 
					                    dir=settings.SCRATCH_DIR,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
                with open(temp_filename, "wb") as f:
 | 
					                with open(temp_filename, "wb") as f:
 | 
				
			||||||
                    f.write(att.payload)
 | 
					                    f.write(att.payload)
 | 
				
			||||||
@ -313,15 +317,13 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
                    "documents.tasks.consume_file",
 | 
					                    "documents.tasks.consume_file",
 | 
				
			||||||
                    path=temp_filename,
 | 
					                    path=temp_filename,
 | 
				
			||||||
                    override_filename=pathvalidate.sanitize_filename(
 | 
					                    override_filename=pathvalidate.sanitize_filename(
 | 
				
			||||||
                        att.filename
 | 
					                        att.filename,
 | 
				
			||||||
                    ),  # NOQA: E501
 | 
					                    ),
 | 
				
			||||||
                    override_title=title,
 | 
					                    override_title=title,
 | 
				
			||||||
                    override_correspondent_id=correspondent.id
 | 
					                    override_correspondent_id=correspondent.id
 | 
				
			||||||
                    if correspondent
 | 
					                    if correspondent
 | 
				
			||||||
                    else None,  # NOQA: E501
 | 
					                    else None,
 | 
				
			||||||
                    override_document_type_id=doc_type.id
 | 
					                    override_document_type_id=doc_type.id if doc_type else None,
 | 
				
			||||||
                    if doc_type
 | 
					 | 
				
			||||||
                    else None,  # NOQA: E501
 | 
					 | 
				
			||||||
                    override_tag_ids=[tag.id] if tag else None,
 | 
					                    override_tag_ids=[tag.id] if tag else None,
 | 
				
			||||||
                    task_name=att.filename[:100],
 | 
					                    task_name=att.filename[:100],
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.core.management.base import BaseCommand
 | 
					from django.core.management.base import BaseCommand
 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless_mail import tasks
 | 
					from paperless_mail import tasks
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -7,7 +6,8 @@ class Command(BaseCommand):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    help = """
 | 
					    help = """
 | 
				
			||||||
    """.replace(
 | 
					    """.replace(
 | 
				
			||||||
        "    ", ""
 | 
					        "    ",
 | 
				
			||||||
 | 
					        "",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def handle(self, *args, **options):
 | 
					    def handle(self, *args, **options):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,5 @@
 | 
				
			|||||||
from django.db import models
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
import documents.models as document_models
 | 
					import documents.models as document_models
 | 
				
			||||||
 | 
					from django.db import models
 | 
				
			||||||
from django.utils.translation import gettext_lazy as _
 | 
					from django.utils.translation import gettext_lazy as _
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -30,12 +28,14 @@ class MailAccount(models.Model):
 | 
				
			|||||||
        null=True,
 | 
					        null=True,
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "This is usually 143 for unencrypted and STARTTLS "
 | 
					            "This is usually 143 for unencrypted and STARTTLS "
 | 
				
			||||||
            "connections, and 993 for SSL connections."
 | 
					            "connections, and 993 for SSL connections.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    imap_security = models.PositiveIntegerField(
 | 
					    imap_security = models.PositiveIntegerField(
 | 
				
			||||||
        _("IMAP security"), choices=IMAP_SECURITY_OPTIONS, default=IMAP_SECURITY_SSL
 | 
					        _("IMAP security"),
 | 
				
			||||||
 | 
					        choices=IMAP_SECURITY_OPTIONS,
 | 
				
			||||||
 | 
					        default=IMAP_SECURITY_SSL,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    username = models.CharField(_("username"), max_length=256)
 | 
					    username = models.CharField(_("username"), max_length=256)
 | 
				
			||||||
@ -48,7 +48,7 @@ class MailAccount(models.Model):
 | 
				
			|||||||
        default="UTF-8",
 | 
					        default="UTF-8",
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "The character set to use when communicating with the "
 | 
					            "The character set to use when communicating with the "
 | 
				
			||||||
            "mail server, such as 'UTF-8' or 'US-ASCII'."
 | 
					            "mail server, such as 'UTF-8' or 'US-ASCII'.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -123,13 +123,22 @@ class MailRule(models.Model):
 | 
				
			|||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    filter_from = models.CharField(
 | 
					    filter_from = models.CharField(
 | 
				
			||||||
        _("filter from"), max_length=256, null=True, blank=True
 | 
					        _("filter from"),
 | 
				
			||||||
 | 
					        max_length=256,
 | 
				
			||||||
 | 
					        null=True,
 | 
				
			||||||
 | 
					        blank=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    filter_subject = models.CharField(
 | 
					    filter_subject = models.CharField(
 | 
				
			||||||
        _("filter subject"), max_length=256, null=True, blank=True
 | 
					        _("filter subject"),
 | 
				
			||||||
 | 
					        max_length=256,
 | 
				
			||||||
 | 
					        null=True,
 | 
				
			||||||
 | 
					        blank=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    filter_body = models.CharField(
 | 
					    filter_body = models.CharField(
 | 
				
			||||||
        _("filter body"), max_length=256, null=True, blank=True
 | 
					        _("filter body"),
 | 
				
			||||||
 | 
					        max_length=256,
 | 
				
			||||||
 | 
					        null=True,
 | 
				
			||||||
 | 
					        blank=True,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    filter_attachment_filename = models.CharField(
 | 
					    filter_attachment_filename = models.CharField(
 | 
				
			||||||
@ -140,12 +149,14 @@ class MailRule(models.Model):
 | 
				
			|||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "Only consume documents which entirely match this "
 | 
					            "Only consume documents which entirely match this "
 | 
				
			||||||
            "filename if specified. Wildcards such as *.pdf or "
 | 
					            "filename if specified. Wildcards such as *.pdf or "
 | 
				
			||||||
            "*invoice* are allowed. Case insensitive."
 | 
					            "*invoice* are allowed. Case insensitive.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    maximum_age = models.PositiveIntegerField(
 | 
					    maximum_age = models.PositiveIntegerField(
 | 
				
			||||||
        _("maximum age"), default=30, help_text=_("Specified in days.")
 | 
					        _("maximum age"),
 | 
				
			||||||
 | 
					        default=30,
 | 
				
			||||||
 | 
					        help_text=_("Specified in days."),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    attachment_type = models.PositiveIntegerField(
 | 
					    attachment_type = models.PositiveIntegerField(
 | 
				
			||||||
@ -154,7 +165,7 @@ class MailRule(models.Model):
 | 
				
			|||||||
        default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY,
 | 
					        default=ATTACHMENT_TYPE_ATTACHMENTS_ONLY,
 | 
				
			||||||
        help_text=_(
 | 
					        help_text=_(
 | 
				
			||||||
            "Inline attachments include embedded images, so it's best "
 | 
					            "Inline attachments include embedded images, so it's best "
 | 
				
			||||||
            "to combine this option with a filename filter."
 | 
					            "to combine this option with a filename filter.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -173,12 +184,14 @@ class MailRule(models.Model):
 | 
				
			|||||||
            "Additional parameter for the action selected above, "
 | 
					            "Additional parameter for the action selected above, "
 | 
				
			||||||
            "i.e., "
 | 
					            "i.e., "
 | 
				
			||||||
            "the target folder of the move to folder action. "
 | 
					            "the target folder of the move to folder action. "
 | 
				
			||||||
            "Subfolders must be separated by dots."
 | 
					            "Subfolders must be separated by dots.",
 | 
				
			||||||
        ),
 | 
					        ),
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assign_title_from = models.PositiveIntegerField(
 | 
					    assign_title_from = models.PositiveIntegerField(
 | 
				
			||||||
        _("assign title from"), choices=TITLE_SELECTOR, default=TITLE_FROM_SUBJECT
 | 
					        _("assign title from"),
 | 
				
			||||||
 | 
					        choices=TITLE_SELECTOR,
 | 
				
			||||||
 | 
					        default=TITLE_FROM_SUBJECT,
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assign_tag = models.ForeignKey(
 | 
					    assign_tag = models.ForeignKey(
 | 
				
			||||||
 | 
				
			|||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from paperless_mail.mail import MailAccountHandler, MailError
 | 
					from paperless_mail.mail import MailAccountHandler
 | 
				
			||||||
 | 
					from paperless_mail.mail import MailError
 | 
				
			||||||
from paperless_mail.models import MailAccount
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -7,13 +7,15 @@ from unittest import mock
 | 
				
			|||||||
from django.core.management import call_command
 | 
					from django.core.management import call_command
 | 
				
			||||||
from django.db import DatabaseError
 | 
					from django.db import DatabaseError
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
from imap_tools import MailMessageFlags, MailboxFolderSelectError
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.models import Correspondent
 | 
					from documents.models import Correspondent
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
 | 
					from imap_tools import MailboxFolderSelectError
 | 
				
			||||||
 | 
					from imap_tools import MailMessageFlags
 | 
				
			||||||
from paperless_mail import tasks
 | 
					from paperless_mail import tasks
 | 
				
			||||||
from paperless_mail.mail import MailError, MailAccountHandler
 | 
					from paperless_mail.mail import MailAccountHandler
 | 
				
			||||||
from paperless_mail.models import MailRule, MailAccount
 | 
					from paperless_mail.mail import MailError
 | 
				
			||||||
 | 
					from paperless_mail.models import MailAccount
 | 
				
			||||||
 | 
					from paperless_mail.models import MailRule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class BogusFolderManager:
 | 
					class BogusFolderManager:
 | 
				
			||||||
@ -83,7 +85,7 @@ class BogusMailBox(ContextManager):
 | 
				
			|||||||
    def move(self, uid_list, folder):
 | 
					    def move(self, uid_list, folder):
 | 
				
			||||||
        if folder == "spam":
 | 
					        if folder == "spam":
 | 
				
			||||||
            self.messages_spam.append(
 | 
					            self.messages_spam.append(
 | 
				
			||||||
                filter(lambda m: m.uid in uid_list, self.messages)
 | 
					                filter(lambda m: m.uid in uid_list, self.messages),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
            self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
 | 
					            self.messages = list(filter(lambda m: m.uid not in uid_list, self.messages))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
@ -115,7 +117,9 @@ def create_message(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def create_attachment(
 | 
					def create_attachment(
 | 
				
			||||||
    filename="the_file.pdf", content_disposition="attachment", payload=b"a PDF document"
 | 
					    filename="the_file.pdf",
 | 
				
			||||||
 | 
					    content_disposition="attachment",
 | 
				
			||||||
 | 
					    payload=b"a PDF document",
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    attachment = namedtuple("Attachment", [])
 | 
					    attachment = namedtuple("Attachment", [])
 | 
				
			||||||
    attachment.filename = filename
 | 
					    attachment.filename = filename
 | 
				
			||||||
@ -163,7 +167,7 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                body="cables",
 | 
					                body="cables",
 | 
				
			||||||
                seen=True,
 | 
					                seen=True,
 | 
				
			||||||
                flagged=False,
 | 
					                flagged=False,
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.bogus_mailbox.messages.append(
 | 
					        self.bogus_mailbox.messages.append(
 | 
				
			||||||
            create_message(
 | 
					            create_message(
 | 
				
			||||||
@ -171,14 +175,14 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                body="from my favorite electronic store",
 | 
					                body="from my favorite electronic store",
 | 
				
			||||||
                seen=False,
 | 
					                seen=False,
 | 
				
			||||||
                flagged=True,
 | 
					                flagged=True,
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.bogus_mailbox.messages.append(
 | 
					        self.bogus_mailbox.messages.append(
 | 
				
			||||||
            create_message(
 | 
					            create_message(
 | 
				
			||||||
                subject="Claim your $10M price now!",
 | 
					                subject="Claim your $10M price now!",
 | 
				
			||||||
                from_="amazon@amazon-some-indian-site.org",
 | 
					                from_="amazon@amazon-some-indian-site.org",
 | 
				
			||||||
                seen=False,
 | 
					                seen=False,
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_get_correspondent(self):
 | 
					    def test_get_correspondent(self):
 | 
				
			||||||
@ -196,12 +200,14 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        handler = MailAccountHandler()
 | 
					        handler = MailAccountHandler()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule(
 | 
					        rule = MailRule(
 | 
				
			||||||
            name="a", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING
 | 
					            name="a",
 | 
				
			||||||
 | 
					            assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NOTHING,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIsNone(handler.get_correspondent(message, rule))
 | 
					        self.assertIsNone(handler.get_correspondent(message, rule))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule(
 | 
					        rule = MailRule(
 | 
				
			||||||
            name="b", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL
 | 
					            name="b",
 | 
				
			||||||
 | 
					            assign_correspondent_from=MailRule.CORRESPONDENT_FROM_EMAIL,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        c = handler.get_correspondent(message, rule)
 | 
					        c = handler.get_correspondent(message, rule)
 | 
				
			||||||
        self.assertIsNotNone(c)
 | 
					        self.assertIsNotNone(c)
 | 
				
			||||||
@ -212,7 +218,8 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertEqual(c.id, me_localhost.id)
 | 
					        self.assertEqual(c.id, me_localhost.id)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule(
 | 
					        rule = MailRule(
 | 
				
			||||||
            name="c", assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME
 | 
					            name="c",
 | 
				
			||||||
 | 
					            assign_correspondent_from=MailRule.CORRESPONDENT_FROM_NAME,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        c = handler.get_correspondent(message, rule)
 | 
					        c = handler.get_correspondent(message, rule)
 | 
				
			||||||
        self.assertIsNotNone(c)
 | 
					        self.assertIsNotNone(c)
 | 
				
			||||||
@ -244,7 +251,9 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_handle_message(self):
 | 
					    def test_handle_message(self):
 | 
				
			||||||
        message = create_message(
 | 
					        message = create_message(
 | 
				
			||||||
            subject="the message title", from_="Myself", num_attachments=2
 | 
					            subject="the message title",
 | 
				
			||||||
 | 
					            from_="Myself",
 | 
				
			||||||
 | 
					            num_attachments=2,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount()
 | 
					        account = MailAccount()
 | 
				
			||||||
@ -376,11 +385,16 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_handle_mail_account_mark_read(self):
 | 
					    def test_handle_mail_account_mark_read(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="secret"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule", account=account, action=MailRule.ACTION_MARK_READ
 | 
					            name="testrule",
 | 
				
			||||||
 | 
					            account=account,
 | 
				
			||||||
 | 
					            action=MailRule.ACTION_MARK_READ,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
					        self.assertEqual(len(self.bogus_mailbox.messages), 3)
 | 
				
			||||||
@ -394,7 +408,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_handle_mail_account_delete(self):
 | 
					    def test_handle_mail_account_delete(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="secret"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
@ -412,7 +429,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_handle_mail_account_flag(self):
 | 
					    def test_handle_mail_account_flag(self):
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="secret"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
@ -432,7 +452,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_handle_mail_account_move(self):
 | 
					    def test_handle_mail_account_move(self):
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="secret"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
@ -453,7 +476,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_error_login(self):
 | 
					    def test_error_login(self):
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="wrong"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="wrong",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
@ -465,11 +491,17 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_error_skip_account(self):
 | 
					    def test_error_skip_account(self):
 | 
				
			||||||
        account_faulty = MailAccount.objects.create(
 | 
					        account_faulty = MailAccount.objects.create(
 | 
				
			||||||
            name="test", imap_server="", username="admin", password="wroasdng"
 | 
					            name="test",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="wroasdng",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test2", imap_server="", username="admin", password="secret"
 | 
					            name="test2",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule",
 | 
					            name="testrule",
 | 
				
			||||||
@ -487,7 +519,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_error_skip_rule(self):
 | 
					    def test_error_skip_rule(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test2", imap_server="", username="admin", password="secret"
 | 
					            name="test2",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule",
 | 
					            name="testrule",
 | 
				
			||||||
@ -523,7 +558,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        m.side_effect = get_correspondent_fake
 | 
					        m.side_effect = get_correspondent_fake
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test2", imap_server="", username="admin", password="secret"
 | 
					            name="test2",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule",
 | 
					            name="testrule",
 | 
				
			||||||
@ -544,7 +582,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_error_create_correspondent(self):
 | 
					    def test_error_create_correspondent(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test2", imap_server="", username="admin", password="secret"
 | 
					            name="test2",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule",
 | 
					            name="testrule",
 | 
				
			||||||
@ -579,7 +620,10 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_filters(self):
 | 
					    def test_filters(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        account = MailAccount.objects.create(
 | 
					        account = MailAccount.objects.create(
 | 
				
			||||||
            name="test3", imap_server="", username="admin", password="secret"
 | 
					            name="test3",
 | 
				
			||||||
 | 
					            imap_server="",
 | 
				
			||||||
 | 
					            username="admin",
 | 
				
			||||||
 | 
					            password="secret",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        rule = MailRule.objects.create(
 | 
					        rule = MailRule.objects.create(
 | 
				
			||||||
            name="testrule3",
 | 
					            name="testrule3",
 | 
				
			||||||
@ -629,7 +673,7 @@ class TestMail(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class TestManagementCommand(TestCase):
 | 
					class TestManagementCommand(TestCase):
 | 
				
			||||||
    @mock.patch(
 | 
					    @mock.patch(
 | 
				
			||||||
        "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts"
 | 
					        "paperless_mail.management.commands.mail_fetcher.tasks.process_mail_accounts",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    def test_mail_fetcher(self, m):
 | 
					    def test_mail_fetcher(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -644,10 +688,16 @@ class TestTasks(TestCase):
 | 
				
			|||||||
        m.side_effect = lambda account: 6
 | 
					        m.side_effect = lambda account: 6
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        MailAccount.objects.create(
 | 
					        MailAccount.objects.create(
 | 
				
			||||||
            name="A", imap_server="A", username="A", password="A"
 | 
					            name="A",
 | 
				
			||||||
 | 
					            imap_server="A",
 | 
				
			||||||
 | 
					            username="A",
 | 
				
			||||||
 | 
					            password="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        MailAccount.objects.create(
 | 
					        MailAccount.objects.create(
 | 
				
			||||||
            name="B", imap_server="A", username="A", password="A"
 | 
					            name="B",
 | 
				
			||||||
 | 
					            imap_server="A",
 | 
				
			||||||
 | 
					            username="A",
 | 
				
			||||||
 | 
					            password="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        result = tasks.process_mail_accounts()
 | 
					        result = tasks.process_mail_accounts()
 | 
				
			||||||
@ -663,7 +713,10 @@ class TestTasks(TestCase):
 | 
				
			|||||||
    def test_single_accounts(self, m):
 | 
					    def test_single_accounts(self, m):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        MailAccount.objects.create(
 | 
					        MailAccount.objects.create(
 | 
				
			||||||
            name="A", imap_server="A", username="A", password="A"
 | 
					            name="A",
 | 
				
			||||||
 | 
					            imap_server="A",
 | 
				
			||||||
 | 
					            username="A",
 | 
				
			||||||
 | 
					            password="A",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tasks.process_mail_account("A")
 | 
					        tasks.process_mail_account("A")
 | 
				
			||||||
 | 
				
			|||||||
@ -1,2 +1,5 @@
 | 
				
			|||||||
# this is here so that django finds the checks.
 | 
					# this is here so that django finds the checks.
 | 
				
			||||||
from .checks import *
 | 
					from .checks import check_default_language_available
 | 
				
			||||||
 | 
					from .checks import get_tesseract_langs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					__all__ = ["get_tesseract_langs", "check_default_language_available"]
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.apps import AppConfig
 | 
					from django.apps import AppConfig
 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless_tesseract.signals import tesseract_consumer_declaration
 | 
					from paperless_tesseract.signals import tesseract_consumer_declaration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,9 @@
 | 
				
			|||||||
import subprocess
 | 
					import subprocess
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.checks import Error, Warning, register
 | 
					from django.core.checks import Error
 | 
				
			||||||
 | 
					from django.core.checks import register
 | 
				
			||||||
 | 
					from django.core.checks import Warning
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_tesseract_langs():
 | 
					def get_tesseract_langs():
 | 
				
			||||||
@ -19,8 +21,8 @@ def check_default_language_available(app_configs, **kwargs):
 | 
				
			|||||||
        return [
 | 
					        return [
 | 
				
			||||||
            Warning(
 | 
					            Warning(
 | 
				
			||||||
                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
 | 
					                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE. "
 | 
				
			||||||
                "This means that tesseract will fallback to english."
 | 
					                "This means that tesseract will fallback to english.",
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        ]
 | 
					        ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    specified_langs = settings.OCR_LANGUAGE.split("+")
 | 
					    specified_langs = settings.OCR_LANGUAGE.split("+")
 | 
				
			||||||
@ -31,8 +33,8 @@ def check_default_language_available(app_configs, **kwargs):
 | 
				
			|||||||
                Error(
 | 
					                Error(
 | 
				
			||||||
                    f"The selected ocr language {lang} is "
 | 
					                    f"The selected ocr language {lang} is "
 | 
				
			||||||
                    f"not installed. Paperless cannot OCR your documents "
 | 
					                    f"not installed. Paperless cannot OCR your documents "
 | 
				
			||||||
                    f"without it. Please fix PAPERLESS_OCR_LANGUAGE."
 | 
					                    f"without it. Please fix PAPERLESS_OCR_LANGUAGE.",
 | 
				
			||||||
                )
 | 
					                ),
 | 
				
			||||||
            ]
 | 
					            ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return []
 | 
					    return []
 | 
				
			||||||
 | 
				
			|||||||
@ -2,10 +2,11 @@ import json
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from PIL import Image
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from documents.parsers import DocumentParser
 | 
				
			||||||
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
 | 
					from documents.parsers import make_thumbnail_from_pdf
 | 
				
			||||||
 | 
					from documents.parsers import ParseError
 | 
				
			||||||
 | 
					from PIL import Image
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class NoTextFoundException(Exception):
 | 
					class NoTextFoundException(Exception):
 | 
				
			||||||
@ -42,7 +43,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
                            "prefix": meta.REVERSE_NS[m.group(1)],
 | 
					                            "prefix": meta.REVERSE_NS[m.group(1)],
 | 
				
			||||||
                            "key": m.group(2),
 | 
					                            "key": m.group(2),
 | 
				
			||||||
                            "value": value,
 | 
					                            "value": value,
 | 
				
			||||||
                        }
 | 
					                        },
 | 
				
			||||||
                    )
 | 
					                    )
 | 
				
			||||||
                except Exception as e:
 | 
					                except Exception as e:
 | 
				
			||||||
                    self.log(
 | 
					                    self.log(
 | 
				
			||||||
@ -53,7 +54,9 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
					    def get_thumbnail(self, document_path, mime_type, file_name=None):
 | 
				
			||||||
        return make_thumbnail_from_pdf(
 | 
					        return make_thumbnail_from_pdf(
 | 
				
			||||||
            self.archive_path or document_path, self.tempdir, self.logging_group
 | 
					            self.archive_path or document_path,
 | 
				
			||||||
 | 
					            self.tempdir,
 | 
				
			||||||
 | 
					            self.logging_group,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def is_image(self, mime_type):
 | 
					    def is_image(self, mime_type):
 | 
				
			||||||
@ -110,7 +113,6 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        from pdfminer.high_level import extract_text as pdfminer_extract_text
 | 
					        from pdfminer.high_level import extract_text as pdfminer_extract_text
 | 
				
			||||||
        from pdfminer.pdftypes import PDFException
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            stripped = post_process_text(pdfminer_extract_text(pdf_file))
 | 
					            stripped = post_process_text(pdfminer_extract_text(pdf_file))
 | 
				
			||||||
@ -129,7 +131,12 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
            return None
 | 
					            return None
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def construct_ocrmypdf_parameters(
 | 
					    def construct_ocrmypdf_parameters(
 | 
				
			||||||
        self, input_file, mime_type, output_file, sidecar_file, safe_fallback=False
 | 
					        self,
 | 
				
			||||||
 | 
					        input_file,
 | 
				
			||||||
 | 
					        mime_type,
 | 
				
			||||||
 | 
					        output_file,
 | 
				
			||||||
 | 
					        sidecar_file,
 | 
				
			||||||
 | 
					        safe_fallback=False,
 | 
				
			||||||
    ):
 | 
					    ):
 | 
				
			||||||
        ocrmypdf_args = {
 | 
					        ocrmypdf_args = {
 | 
				
			||||||
            "input_file": input_file,
 | 
					            "input_file": input_file,
 | 
				
			||||||
@ -167,7 +174,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
            ocrmypdf_args["rotate_pages"] = True
 | 
					            ocrmypdf_args["rotate_pages"] = True
 | 
				
			||||||
            ocrmypdf_args[
 | 
					            ocrmypdf_args[
 | 
				
			||||||
                "rotate_pages_threshold"
 | 
					                "rotate_pages_threshold"
 | 
				
			||||||
            ] = settings.OCR_ROTATE_PAGES_THRESHOLD  # NOQA: E501
 | 
					            ] = settings.OCR_ROTATE_PAGES_THRESHOLD
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if settings.OCR_PAGES > 0:
 | 
					        if settings.OCR_PAGES > 0:
 | 
				
			||||||
            ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
 | 
					            ocrmypdf_args["pages"] = f"1-{settings.OCR_PAGES}"
 | 
				
			||||||
@ -202,7 +209,7 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
                raise ParseError(
 | 
					                raise ParseError(
 | 
				
			||||||
                    f"Cannot produce archive PDF for image {input_file}, "
 | 
					                    f"Cannot produce archive PDF for image {input_file}, "
 | 
				
			||||||
                    f"no DPI information is present in this image and "
 | 
					                    f"no DPI information is present in this image and "
 | 
				
			||||||
                    f"OCR_IMAGE_DPI is not set."
 | 
					                    f"OCR_IMAGE_DPI is not set.",
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if settings.OCR_USER_ARGS and not safe_fallback:
 | 
					        if settings.OCR_USER_ARGS and not safe_fallback:
 | 
				
			||||||
@ -241,7 +248,10 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
        sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
 | 
					        sidecar_file = os.path.join(self.tempdir, "sidecar.txt")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        args = self.construct_ocrmypdf_parameters(
 | 
					        args = self.construct_ocrmypdf_parameters(
 | 
				
			||||||
            document_path, mime_type, archive_path, sidecar_file
 | 
					            document_path,
 | 
				
			||||||
 | 
					            mime_type,
 | 
				
			||||||
 | 
					            archive_path,
 | 
				
			||||||
 | 
					            sidecar_file,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
@ -289,7 +299,8 @@ class RasterisedDocumentParser(DocumentParser):
 | 
				
			|||||||
                # is bigger and blurry due to --force-ocr.
 | 
					                # is bigger and blurry due to --force-ocr.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                self.text = self.extract_text(
 | 
					                self.text = self.extract_text(
 | 
				
			||||||
                    sidecar_file_fallback, archive_path_fallback
 | 
					                    sidecar_file_fallback,
 | 
				
			||||||
 | 
					                    archive_path_fallback,
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            except Exception as e:
 | 
					            except Exception as e:
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,8 @@
 | 
				
			|||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.core.checks import ERROR
 | 
					from django.core.checks import ERROR
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from paperless_tesseract import check_default_language_available
 | 
					from paperless_tesseract import check_default_language_available
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -16,8 +16,8 @@ class TestChecks(TestCase):
 | 
				
			|||||||
        self.assertEqual(len(msgs), 1)
 | 
					        self.assertEqual(len(msgs), 1)
 | 
				
			||||||
        self.assertTrue(
 | 
					        self.assertTrue(
 | 
				
			||||||
            msgs[0].msg.startswith(
 | 
					            msgs[0].msg.startswith(
 | 
				
			||||||
                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE"
 | 
					                "No OCR language has been specified with PAPERLESS_OCR_LANGUAGE",
 | 
				
			||||||
            )
 | 
					            ),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_LANGUAGE="ita")
 | 
					    @override_settings(OCR_LANGUAGE="ita")
 | 
				
			||||||
 | 
				
			|||||||
@ -3,11 +3,13 @@ import uuid
 | 
				
			|||||||
from typing import ContextManager
 | 
					from typing import ContextManager
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase, override_settings
 | 
					from django.test import override_settings
 | 
				
			||||||
 | 
					from django.test import TestCase
 | 
				
			||||||
from documents.parsers import ParseError, run_convert
 | 
					from documents.parsers import ParseError
 | 
				
			||||||
 | 
					from documents.parsers import run_convert
 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
from paperless_tesseract.parsers import RasterisedDocumentParser, post_process_text
 | 
					from paperless_tesseract.parsers import post_process_text
 | 
				
			||||||
 | 
					from paperless_tesseract.parsers import RasterisedDocumentParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
image_to_string_calls = []
 | 
					image_to_string_calls = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -56,7 +58,9 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
                result,
 | 
					                result,
 | 
				
			||||||
                actual_result,
 | 
					                actual_result,
 | 
				
			||||||
                "strip_exceess_whitespace({}) != '{}', but '{}'".format(
 | 
					                "strip_exceess_whitespace({}) != '{}', but '{}'".format(
 | 
				
			||||||
                    source, result, actual_result
 | 
					                    source,
 | 
				
			||||||
 | 
					                    result,
 | 
				
			||||||
 | 
					                    actual_result,
 | 
				
			||||||
                ),
 | 
					                ),
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -65,7 +69,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_get_text_from_pdf(self):
 | 
					    def test_get_text_from_pdf(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
					        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
				
			||||||
        text = parser.extract_text(
 | 
					        text = parser.extract_text(
 | 
				
			||||||
            None, os.path.join(self.SAMPLE_FILES, "simple-digital.pdf")
 | 
					            None,
 | 
				
			||||||
 | 
					            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertContainsStrings(text.strip(), ["This is a test document."])
 | 
					        self.assertContainsStrings(text.strip(), ["This is a test document."])
 | 
				
			||||||
@ -73,7 +78,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_thumbnail(self):
 | 
					    def test_thumbnail(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
					        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
				
			||||||
        thumb = parser.get_thumbnail(
 | 
					        thumb = parser.get_thumbnail(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(thumb))
 | 
					        self.assertTrue(os.path.isfile(thumb))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -89,14 +95,16 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
					        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
				
			||||||
        thumb = parser.get_thumbnail(
 | 
					        thumb = parser.get_thumbnail(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(thumb))
 | 
					        self.assertTrue(os.path.isfile(thumb))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_thumbnail_encrypted(self):
 | 
					    def test_thumbnail_encrypted(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
					        parser = RasterisedDocumentParser(uuid.uuid4())
 | 
				
			||||||
        thumb = parser.get_thumbnail(
 | 
					        thumb = parser.get_thumbnail(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(thumb))
 | 
					        self.assertTrue(os.path.isfile(thumb))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -113,7 +121,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "simple-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
@ -124,7 +133,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
@ -139,7 +149,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertIsNone(parser.archive_path)
 | 
					        self.assertIsNone(parser.archive_path)
 | 
				
			||||||
@ -168,7 +179,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "encrypted.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertIsNone(parser.archive_path)
 | 
					        self.assertIsNone(parser.archive_path)
 | 
				
			||||||
@ -178,7 +190,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_with_form_error_notext(self):
 | 
					    def test_with_form_error_notext(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
@ -191,7 +204,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "with-form.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "with-form.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
@ -221,7 +235,7 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        dpi = parser.calculate_a4_dpi(
 | 
					        dpi = parser.calculate_a4_dpi(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png")
 | 
					            os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(dpi, 62)
 | 
					        self.assertEqual(dpi, 62)
 | 
				
			||||||
@ -233,7 +247,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        def f():
 | 
					        def f():
 | 
				
			||||||
            parser.parse(
 | 
					            parser.parse(
 | 
				
			||||||
                os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"), "image/png"
 | 
					                os.path.join(self.SAMPLE_FILES, "simple-no-dpi.png"),
 | 
				
			||||||
 | 
					                "image/png",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertRaises(ParseError, f)
 | 
					        self.assertRaises(ParseError, f)
 | 
				
			||||||
@ -247,68 +262,80 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["this is a test document."]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["this is a test document."],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_multi_page(self):
 | 
					    def test_multi_page(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_PAGES=2, OCR_MODE="skip")
 | 
					    @override_settings(OCR_PAGES=2, OCR_MODE="skip")
 | 
				
			||||||
    def test_multi_page_pages_skip(self):
 | 
					    def test_multi_page_pages_skip(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_PAGES=2, OCR_MODE="redo")
 | 
					    @override_settings(OCR_PAGES=2, OCR_MODE="redo")
 | 
				
			||||||
    def test_multi_page_pages_redo(self):
 | 
					    def test_multi_page_pages_redo(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_PAGES=2, OCR_MODE="force")
 | 
					    @override_settings(OCR_PAGES=2, OCR_MODE="force")
 | 
				
			||||||
    def test_multi_page_pages_force(self):
 | 
					    def test_multi_page_pages_force(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OOCR_MODE="skip")
 | 
					    @override_settings(OOCR_MODE="skip")
 | 
				
			||||||
    def test_multi_page_analog_pages_skip(self):
 | 
					    def test_multi_page_analog_pages_skip(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_PAGES=2, OCR_MODE="redo")
 | 
					    @override_settings(OCR_PAGES=2, OCR_MODE="redo")
 | 
				
			||||||
    def test_multi_page_analog_pages_redo(self):
 | 
					    def test_multi_page_analog_pages_redo(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
 | 
					        self.assertContainsStrings(parser.get_text().lower(), ["page 1", "page 2"])
 | 
				
			||||||
@ -318,7 +345,8 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_multi_page_analog_pages_force(self):
 | 
					    def test_multi_page_analog_pages_force(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
 | 
					        self.assertContainsStrings(parser.get_text().lower(), ["page 1"])
 | 
				
			||||||
@ -329,29 +357,34 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_skip_noarchive_withtext(self):
 | 
					    def test_skip_noarchive_withtext(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-digital.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIsNone(parser.archive_path)
 | 
					        self.assertIsNone(parser.archive_path)
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_MODE="skip_noarchive")
 | 
					    @override_settings(OCR_MODE="skip_noarchive")
 | 
				
			||||||
    def test_skip_noarchive_notext(self):
 | 
					    def test_skip_noarchive_notext(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-images.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 1", "page 2", "page 3"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 1", "page 2", "page 3"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_MODE="skip")
 | 
					    @override_settings(OCR_MODE="skip")
 | 
				
			||||||
    def test_multi_page_mixed(self):
 | 
					    def test_multi_page_mixed(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
					        self.assertTrue(os.path.isfile(parser.archive_path))
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
@ -368,11 +401,13 @@ class TestParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
    def test_multi_page_mixed_no_archive(self):
 | 
					    def test_multi_page_mixed_no_archive(self):
 | 
				
			||||||
        parser = RasterisedDocumentParser(None)
 | 
					        parser = RasterisedDocumentParser(None)
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"), "application/pdf"
 | 
					            os.path.join(self.SAMPLE_FILES, "multi-page-mixed.pdf"),
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertIsNone(parser.archive_path)
 | 
					        self.assertIsNone(parser.archive_path)
 | 
				
			||||||
        self.assertContainsStrings(
 | 
					        self.assertContainsStrings(
 | 
				
			||||||
            parser.get_text().lower(), ["page 4", "page 5", "page 6"]
 | 
					            parser.get_text().lower(),
 | 
				
			||||||
 | 
					            ["page 4", "page 5", "page 6"],
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
 | 
					    @override_settings(OCR_MODE="skip", OCR_ROTATE_PAGES=True)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,4 @@
 | 
				
			|||||||
from django.apps import AppConfig
 | 
					from django.apps import AppConfig
 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless_text.signals import text_consumer_declaration
 | 
					from paperless_text.signals import text_consumer_declaration
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -1,9 +1,10 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from PIL import ImageDraw, ImageFont, Image
 | 
					 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.parsers import DocumentParser
 | 
					from documents.parsers import DocumentParser
 | 
				
			||||||
 | 
					from PIL import Image
 | 
				
			||||||
 | 
					from PIL import ImageDraw
 | 
				
			||||||
 | 
					from PIL import ImageFont
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TextDocumentParser(DocumentParser):
 | 
					class TextDocumentParser(DocumentParser):
 | 
				
			||||||
 | 
				
			|||||||
@ -1,7 +1,6 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					 | 
				
			||||||
from documents.tests.utils import DirectoriesMixin
 | 
					from documents.tests.utils import DirectoriesMixin
 | 
				
			||||||
from paperless_text.parsers import TextDocumentParser
 | 
					from paperless_text.parsers import TextDocumentParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -13,7 +12,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        # just make sure that it does not crash
 | 
					        # just make sure that it does not crash
 | 
				
			||||||
        f = parser.get_thumbnail(
 | 
					        f = parser.get_thumbnail(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
 | 
				
			||||||
 | 
					            "text/plain",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        self.assertTrue(os.path.isfile(f))
 | 
					        self.assertTrue(os.path.isfile(f))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -22,7 +22,8 @@ class TestTextParser(DirectoriesMixin, TestCase):
 | 
				
			|||||||
        parser = TextDocumentParser(None)
 | 
					        parser = TextDocumentParser(None)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser.parse(
 | 
					        parser.parse(
 | 
				
			||||||
            os.path.join(os.path.dirname(__file__), "samples", "test.txt"), "text/plain"
 | 
					            os.path.join(os.path.dirname(__file__), "samples", "test.txt"),
 | 
				
			||||||
 | 
					            "text/plain",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(parser.get_text(), "This is a test file.\n")
 | 
					        self.assertEqual(parser.get_text(), "This is a test file.\n")
 | 
				
			||||||
 | 
				
			|||||||
@ -1,10 +1,11 @@
 | 
				
			|||||||
import os
 | 
					import os
 | 
				
			||||||
import requests
 | 
					
 | 
				
			||||||
import dateutil.parser
 | 
					import dateutil.parser
 | 
				
			||||||
 | 
					import requests
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from documents.parsers import DocumentParser
 | 
				
			||||||
from documents.parsers import DocumentParser, ParseError, make_thumbnail_from_pdf
 | 
					from documents.parsers import make_thumbnail_from_pdf
 | 
				
			||||||
 | 
					from documents.parsers import ParseError
 | 
				
			||||||
from tika import parser
 | 
					from tika import parser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -20,7 +21,9 @@ class TikaDocumentParser(DocumentParser):
 | 
				
			|||||||
            self.archive_path = self.convert_to_pdf(document_path, file_name)
 | 
					            self.archive_path = self.convert_to_pdf(document_path, file_name)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return make_thumbnail_from_pdf(
 | 
					        return make_thumbnail_from_pdf(
 | 
				
			||||||
            self.archive_path, self.tempdir, self.logging_group
 | 
					            self.archive_path,
 | 
				
			||||||
 | 
					            self.tempdir,
 | 
				
			||||||
 | 
					            self.logging_group,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def extract_metadata(self, document_path, mime_type):
 | 
					    def extract_metadata(self, document_path, mime_type):
 | 
				
			||||||
@ -53,7 +56,7 @@ class TikaDocumentParser(DocumentParser):
 | 
				
			|||||||
        except Exception as err:
 | 
					        except Exception as err:
 | 
				
			||||||
            raise ParseError(
 | 
					            raise ParseError(
 | 
				
			||||||
                f"Could not parse {document_path} with tika server at "
 | 
					                f"Could not parse {document_path} with tika server at "
 | 
				
			||||||
                f"{tika_server}: {err}"
 | 
					                f"{tika_server}: {err}",
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.text = parsed["content"].strip()
 | 
					        self.text = parsed["content"].strip()
 | 
				
			||||||
@ -74,11 +77,12 @@ class TikaDocumentParser(DocumentParser):
 | 
				
			|||||||
        url = gotenberg_server + "/forms/libreoffice/convert"
 | 
					        url = gotenberg_server + "/forms/libreoffice/convert"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
 | 
					        self.log("info", f"Converting {document_path} to PDF as {pdf_path}")
 | 
				
			||||||
 | 
					        with open(document_path, "rb") as document_handle:
 | 
				
			||||||
            files = {
 | 
					            files = {
 | 
				
			||||||
                "files": (
 | 
					                "files": (
 | 
				
			||||||
                    file_name or os.path.basename(document_path),
 | 
					                    file_name or os.path.basename(document_path),
 | 
				
			||||||
                open(document_path, "rb"),
 | 
					                    document_handle,
 | 
				
			||||||
            )
 | 
					                ),
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            headers = {}
 | 
					            headers = {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -88,7 +92,7 @@ class TikaDocumentParser(DocumentParser):
 | 
				
			|||||||
            except Exception as err:
 | 
					            except Exception as err:
 | 
				
			||||||
                raise ParseError(f"Error while converting document to PDF: {err}")
 | 
					                raise ParseError(f"Error while converting document to PDF: {err}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        file = open(pdf_path, "wb")
 | 
					        with open(pdf_path, "wb") as file:
 | 
				
			||||||
            file.write(response.content)
 | 
					            file.write(response.content)
 | 
				
			||||||
            file.close()
 | 
					            file.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -10,12 +10,12 @@ def tika_consumer_declaration(sender, **kwargs):
 | 
				
			|||||||
        "weight": 10,
 | 
					        "weight": 10,
 | 
				
			||||||
        "mime_types": {
 | 
					        "mime_types": {
 | 
				
			||||||
            "application/msword": ".doc",
 | 
					            "application/msword": ".doc",
 | 
				
			||||||
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # NOQA: E501
 | 
					            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",  # noqa: E501
 | 
				
			||||||
            "application/vnd.ms-excel": ".xls",
 | 
					            "application/vnd.ms-excel": ".xls",
 | 
				
			||||||
            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # NOQA: E501
 | 
					            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",  # noqa: E501
 | 
				
			||||||
            "application/vnd.ms-powerpoint": ".ppt",
 | 
					            "application/vnd.ms-powerpoint": ".ppt",
 | 
				
			||||||
            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # NOQA: E501
 | 
					            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",  # noqa: E501
 | 
				
			||||||
            "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # NOQA: E501
 | 
					            "application/vnd.openxmlformats-officedocument.presentationml.slideshow": ".ppsx",  # noqa: E501
 | 
				
			||||||
            "application/vnd.oasis.opendocument.presentation": ".odp",
 | 
					            "application/vnd.oasis.opendocument.presentation": ".odp",
 | 
				
			||||||
            "application/vnd.oasis.opendocument.spreadsheet": ".ods",
 | 
					            "application/vnd.oasis.opendocument.spreadsheet": ".ods",
 | 
				
			||||||
            "application/vnd.oasis.opendocument.text": ".odt",
 | 
					            "application/vnd.oasis.opendocument.text": ".odt",
 | 
				
			||||||
 | 
				
			|||||||
@ -4,9 +4,8 @@ from pathlib import Path
 | 
				
			|||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
from requests import Response
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless_tika.parsers import TikaDocumentParser
 | 
					from paperless_tika.parsers import TikaDocumentParser
 | 
				
			||||||
 | 
					from requests import Response
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestTikaParser(TestCase):
 | 
					class TestTikaParser(TestCase):
 | 
				
			||||||
@ -42,14 +41,15 @@ class TestTikaParser(TestCase):
 | 
				
			|||||||
    @mock.patch("paperless_tika.parsers.parser.from_file")
 | 
					    @mock.patch("paperless_tika.parsers.parser.from_file")
 | 
				
			||||||
    def test_metadata(self, from_file):
 | 
					    def test_metadata(self, from_file):
 | 
				
			||||||
        from_file.return_value = {
 | 
					        from_file.return_value = {
 | 
				
			||||||
            "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"}
 | 
					            "metadata": {"Creation-Date": "2020-11-21", "Some-key": "value"},
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        file = os.path.join(self.parser.tempdir, "input.odt")
 | 
					        file = os.path.join(self.parser.tempdir, "input.odt")
 | 
				
			||||||
        Path(file).touch()
 | 
					        Path(file).touch()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        metadata = self.parser.extract_metadata(
 | 
					        metadata = self.parser.extract_metadata(
 | 
				
			||||||
            file, "application/vnd.oasis.opendocument.text"
 | 
					            file,
 | 
				
			||||||
 | 
					            "application/vnd.oasis.opendocument.text",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
 | 
					        self.assertTrue("Creation-Date" in [m["key"] for m in metadata])
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user