mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-24 02:02:23 -04:00
Format Python code with black
This commit is contained in:
parent
13885968e3
commit
fc695896dd
@ -1,39 +1,32 @@
|
|||||||
from django.contrib import admin
|
from django.contrib import admin
|
||||||
|
|
||||||
from .models import Correspondent, Document, DocumentType, Tag, \
|
from .models import (
|
||||||
SavedView, SavedViewFilterRule
|
Correspondent,
|
||||||
|
Document,
|
||||||
|
DocumentType,
|
||||||
|
Tag,
|
||||||
|
SavedView,
|
||||||
|
SavedViewFilterRule,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class CorrespondentAdmin(admin.ModelAdmin):
|
class CorrespondentAdmin(admin.ModelAdmin):
|
||||||
|
|
||||||
list_display = (
|
list_display = ("name", "match", "matching_algorithm")
|
||||||
"name",
|
|
||||||
"match",
|
|
||||||
"matching_algorithm"
|
|
||||||
)
|
|
||||||
list_filter = ("matching_algorithm",)
|
list_filter = ("matching_algorithm",)
|
||||||
list_editable = ("match", "matching_algorithm")
|
list_editable = ("match", "matching_algorithm")
|
||||||
|
|
||||||
|
|
||||||
class TagAdmin(admin.ModelAdmin):
|
class TagAdmin(admin.ModelAdmin):
|
||||||
|
|
||||||
list_display = (
|
list_display = ("name", "color", "match", "matching_algorithm")
|
||||||
"name",
|
|
||||||
"color",
|
|
||||||
"match",
|
|
||||||
"matching_algorithm"
|
|
||||||
)
|
|
||||||
list_filter = ("color", "matching_algorithm")
|
list_filter = ("color", "matching_algorithm")
|
||||||
list_editable = ("color", "match", "matching_algorithm")
|
list_editable = ("color", "match", "matching_algorithm")
|
||||||
|
|
||||||
|
|
||||||
class DocumentTypeAdmin(admin.ModelAdmin):
|
class DocumentTypeAdmin(admin.ModelAdmin):
|
||||||
|
|
||||||
list_display = (
|
list_display = ("name", "match", "matching_algorithm")
|
||||||
"name",
|
|
||||||
"match",
|
|
||||||
"matching_algorithm"
|
|
||||||
)
|
|
||||||
list_filter = ("matching_algorithm",)
|
list_filter = ("matching_algorithm",)
|
||||||
list_editable = ("match", "matching_algorithm")
|
list_editable = ("match", "matching_algorithm")
|
||||||
|
|
||||||
@ -49,18 +42,12 @@ class DocumentAdmin(admin.ModelAdmin):
|
|||||||
"filename",
|
"filename",
|
||||||
"checksum",
|
"checksum",
|
||||||
"archive_filename",
|
"archive_filename",
|
||||||
"archive_checksum"
|
"archive_checksum",
|
||||||
)
|
)
|
||||||
|
|
||||||
list_display_links = ("title",)
|
list_display_links = ("title",)
|
||||||
|
|
||||||
list_display = (
|
list_display = ("id", "title", "mime_type", "filename", "archive_filename")
|
||||||
"id",
|
|
||||||
"title",
|
|
||||||
"mime_type",
|
|
||||||
"filename",
|
|
||||||
"archive_filename"
|
|
||||||
)
|
|
||||||
|
|
||||||
list_filter = (
|
list_filter = (
|
||||||
("mime_type"),
|
("mime_type"),
|
||||||
@ -79,6 +66,7 @@ class DocumentAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
def created_(self, obj):
|
def created_(self, obj):
|
||||||
return obj.created.date().strftime("%Y-%m-%d")
|
return obj.created.date().strftime("%Y-%m-%d")
|
||||||
|
|
||||||
created_.short_description = "Created"
|
created_.short_description = "Created"
|
||||||
|
|
||||||
def delete_queryset(self, request, queryset):
|
def delete_queryset(self, request, queryset):
|
||||||
@ -92,11 +80,13 @@ class DocumentAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
def delete_model(self, request, obj):
|
def delete_model(self, request, obj):
|
||||||
from documents import index
|
from documents import index
|
||||||
|
|
||||||
index.remove_document_from_index(obj)
|
index.remove_document_from_index(obj)
|
||||||
super(DocumentAdmin, self).delete_model(request, obj)
|
super(DocumentAdmin, self).delete_model(request, obj)
|
||||||
|
|
||||||
def save_model(self, request, obj, form, change):
|
def save_model(self, request, obj, form, change):
|
||||||
from documents import index
|
from documents import index
|
||||||
|
|
||||||
index.add_or_update_document(obj)
|
index.add_or_update_document(obj)
|
||||||
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
super(DocumentAdmin, self).save_model(request, obj, form, change)
|
||||||
|
|
||||||
@ -109,9 +99,7 @@ class SavedViewAdmin(admin.ModelAdmin):
|
|||||||
|
|
||||||
list_display = ("name", "user")
|
list_display = ("name", "user")
|
||||||
|
|
||||||
inlines = [
|
inlines = [RuleInline]
|
||||||
RuleInline
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
admin.site.register(Correspondent, CorrespondentAdmin)
|
admin.site.register(Correspondent, CorrespondentAdmin)
|
||||||
|
@ -17,7 +17,7 @@ class DocumentsConfig(AppConfig):
|
|||||||
set_correspondent,
|
set_correspondent,
|
||||||
set_document_type,
|
set_document_type,
|
||||||
set_tags,
|
set_tags,
|
||||||
add_to_index
|
add_to_index,
|
||||||
)
|
)
|
||||||
|
|
||||||
document_consumption_finished.connect(add_inbox_tags)
|
document_consumption_finished.connect(add_inbox_tags)
|
||||||
|
@ -4,14 +4,12 @@ from documents.models import Document
|
|||||||
|
|
||||||
|
|
||||||
class BulkArchiveStrategy:
|
class BulkArchiveStrategy:
|
||||||
|
|
||||||
def __init__(self, zipf: ZipFile):
|
def __init__(self, zipf: ZipFile):
|
||||||
self.zipf = zipf
|
self.zipf = zipf
|
||||||
|
|
||||||
def make_unique_filename(self,
|
def make_unique_filename(
|
||||||
doc: Document,
|
self, doc: Document, archive: bool = False, folder: str = ""
|
||||||
archive: bool = False,
|
):
|
||||||
folder: str = ""):
|
|
||||||
counter = 0
|
counter = 0
|
||||||
while True:
|
while True:
|
||||||
filename = folder + doc.get_public_filename(archive, counter)
|
filename = folder + doc.get_public_filename(archive, counter)
|
||||||
@ -25,36 +23,31 @@ class BulkArchiveStrategy:
|
|||||||
|
|
||||||
|
|
||||||
class OriginalsOnlyStrategy(BulkArchiveStrategy):
|
class OriginalsOnlyStrategy(BulkArchiveStrategy):
|
||||||
|
|
||||||
def add_document(self, doc: Document):
|
def add_document(self, doc: Document):
|
||||||
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
||||||
|
|
||||||
|
|
||||||
class ArchiveOnlyStrategy(BulkArchiveStrategy):
|
class ArchiveOnlyStrategy(BulkArchiveStrategy):
|
||||||
|
|
||||||
def __init__(self, zipf):
|
def __init__(self, zipf):
|
||||||
super(ArchiveOnlyStrategy, self).__init__(zipf)
|
super(ArchiveOnlyStrategy, self).__init__(zipf)
|
||||||
|
|
||||||
def add_document(self, doc: Document):
|
def add_document(self, doc: Document):
|
||||||
if doc.has_archive_version:
|
if doc.has_archive_version:
|
||||||
self.zipf.write(doc.archive_path,
|
self.zipf.write(
|
||||||
self.make_unique_filename(doc, archive=True))
|
doc.archive_path, self.make_unique_filename(doc, archive=True)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
self.zipf.write(doc.source_path,
|
self.zipf.write(doc.source_path, self.make_unique_filename(doc))
|
||||||
self.make_unique_filename(doc))
|
|
||||||
|
|
||||||
|
|
||||||
class OriginalAndArchiveStrategy(BulkArchiveStrategy):
|
class OriginalAndArchiveStrategy(BulkArchiveStrategy):
|
||||||
|
|
||||||
def add_document(self, doc: Document):
|
def add_document(self, doc: Document):
|
||||||
if doc.has_archive_version:
|
if doc.has_archive_version:
|
||||||
self.zipf.write(
|
self.zipf.write(
|
||||||
doc.archive_path, self.make_unique_filename(
|
doc.archive_path,
|
||||||
doc, archive=True, folder="archive/"
|
self.make_unique_filename(doc, archive=True, folder="archive/"),
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.zipf.write(
|
self.zipf.write(
|
||||||
doc.source_path,
|
doc.source_path, self.make_unique_filename(doc, folder="originals/")
|
||||||
self.make_unique_filename(doc, folder="originals/")
|
|
||||||
)
|
)
|
||||||
|
@ -10,13 +10,11 @@ def set_correspondent(doc_ids, correspondent):
|
|||||||
if correspondent:
|
if correspondent:
|
||||||
correspondent = Correspondent.objects.get(id=correspondent)
|
correspondent = Correspondent.objects.get(id=correspondent)
|
||||||
|
|
||||||
qs = Document.objects.filter(
|
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
|
||||||
Q(id__in=doc_ids) & ~Q(correspondent=correspondent))
|
|
||||||
affected_docs = [doc.id for doc in qs]
|
affected_docs = [doc.id for doc in qs]
|
||||||
qs.update(correspondent=correspondent)
|
qs.update(correspondent=correspondent)
|
||||||
|
|
||||||
async_task(
|
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
@ -25,13 +23,11 @@ def set_document_type(doc_ids, document_type):
|
|||||||
if document_type:
|
if document_type:
|
||||||
document_type = DocumentType.objects.get(id=document_type)
|
document_type = DocumentType.objects.get(id=document_type)
|
||||||
|
|
||||||
qs = Document.objects.filter(
|
qs = Document.objects.filter(Q(id__in=doc_ids) & ~Q(document_type=document_type))
|
||||||
Q(id__in=doc_ids) & ~Q(document_type=document_type))
|
|
||||||
affected_docs = [doc.id for doc in qs]
|
affected_docs = [doc.id for doc in qs]
|
||||||
qs.update(document_type=document_type)
|
qs.update(document_type=document_type)
|
||||||
|
|
||||||
async_task(
|
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
@ -43,13 +39,11 @@ def add_tag(doc_ids, tag):
|
|||||||
|
|
||||||
DocumentTagRelationship = Document.tags.through
|
DocumentTagRelationship = Document.tags.through
|
||||||
|
|
||||||
DocumentTagRelationship.objects.bulk_create([
|
DocumentTagRelationship.objects.bulk_create(
|
||||||
DocumentTagRelationship(
|
[DocumentTagRelationship(document_id=doc, tag_id=tag) for doc in affected_docs]
|
||||||
document_id=doc, tag_id=tag) for doc in affected_docs
|
)
|
||||||
])
|
|
||||||
|
|
||||||
async_task(
|
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
@ -62,12 +56,10 @@ def remove_tag(doc_ids, tag):
|
|||||||
DocumentTagRelationship = Document.tags.through
|
DocumentTagRelationship = Document.tags.through
|
||||||
|
|
||||||
DocumentTagRelationship.objects.filter(
|
DocumentTagRelationship.objects.filter(
|
||||||
Q(document_id__in=affected_docs) &
|
Q(document_id__in=affected_docs) & Q(tag_id=tag)
|
||||||
Q(tag_id=tag)
|
|
||||||
).delete()
|
).delete()
|
||||||
|
|
||||||
async_task(
|
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
@ -83,13 +75,15 @@ def modify_tags(doc_ids, add_tags, remove_tags):
|
|||||||
tag_id__in=remove_tags,
|
tag_id__in=remove_tags,
|
||||||
).delete()
|
).delete()
|
||||||
|
|
||||||
DocumentTagRelationship.objects.bulk_create([DocumentTagRelationship(
|
DocumentTagRelationship.objects.bulk_create(
|
||||||
document_id=doc, tag_id=tag) for (doc, tag) in itertools.product(
|
[
|
||||||
affected_docs, add_tags)
|
DocumentTagRelationship(document_id=doc, tag_id=tag)
|
||||||
], ignore_conflicts=True)
|
for (doc, tag) in itertools.product(affected_docs, add_tags)
|
||||||
|
],
|
||||||
|
ignore_conflicts=True,
|
||||||
|
)
|
||||||
|
|
||||||
async_task(
|
async_task("documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
||||||
"documents.tasks.bulk_update_documents", document_ids=affected_docs)
|
|
||||||
|
|
||||||
return "OK"
|
return "OK"
|
||||||
|
|
||||||
|
@ -16,20 +16,25 @@ def changed_password_check(app_configs, **kwargs):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
encrypted_doc = Document.objects.filter(
|
encrypted_doc = Document.objects.filter(
|
||||||
storage_type=Document.STORAGE_TYPE_GPG).first()
|
storage_type=Document.STORAGE_TYPE_GPG
|
||||||
|
).first()
|
||||||
except (OperationalError, ProgrammingError, FieldError):
|
except (OperationalError, ProgrammingError, FieldError):
|
||||||
return [] # No documents table yet
|
return [] # No documents table yet
|
||||||
|
|
||||||
if encrypted_doc:
|
if encrypted_doc:
|
||||||
|
|
||||||
if not settings.PASSPHRASE:
|
if not settings.PASSPHRASE:
|
||||||
return [Error(
|
return [
|
||||||
|
Error(
|
||||||
"The database contains encrypted documents but no password "
|
"The database contains encrypted documents but no password "
|
||||||
"is set."
|
"is set."
|
||||||
)]
|
)
|
||||||
|
]
|
||||||
|
|
||||||
if not GnuPG.decrypted(encrypted_doc.source_file):
|
if not GnuPG.decrypted(encrypted_doc.source_file):
|
||||||
return [Error(textwrap.dedent(
|
return [
|
||||||
|
Error(
|
||||||
|
textwrap.dedent(
|
||||||
"""
|
"""
|
||||||
The current password doesn't match the password of the
|
The current password doesn't match the password of the
|
||||||
existing documents.
|
existing documents.
|
||||||
@ -37,7 +42,10 @@ def changed_password_check(app_configs, **kwargs):
|
|||||||
If you intend to change your password, you must first export
|
If you intend to change your password, you must first export
|
||||||
all of the old documents, start fresh with the new password
|
all of the old documents, start fresh with the new password
|
||||||
and then re-import them."
|
and then re-import them."
|
||||||
"""))]
|
"""
|
||||||
|
)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
return []
|
return []
|
||||||
|
|
||||||
@ -50,7 +58,11 @@ def parser_check(app_configs, **kwargs):
|
|||||||
parsers.append(response[1])
|
parsers.append(response[1])
|
||||||
|
|
||||||
if len(parsers) == 0:
|
if len(parsers) == 0:
|
||||||
return [Error("No parsers found. This is a bug. The consumer won't be "
|
return [
|
||||||
"able to consume any documents without parsers.")]
|
Error(
|
||||||
|
"No parsers found. This is a bug. The consumer won't be "
|
||||||
|
"able to consume any documents without parsers."
|
||||||
|
)
|
||||||
|
]
|
||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
@ -39,8 +39,7 @@ def load_classifier():
|
|||||||
try:
|
try:
|
||||||
classifier.load()
|
classifier.load()
|
||||||
|
|
||||||
except (ClassifierModelCorruptError,
|
except (ClassifierModelCorruptError, IncompatibleClassifierVersionError):
|
||||||
IncompatibleClassifierVersionError):
|
|
||||||
# there's something wrong with the model file.
|
# there's something wrong with the model file.
|
||||||
logger.exception(
|
logger.exception(
|
||||||
f"Unrecoverable error while loading document "
|
f"Unrecoverable error while loading document "
|
||||||
@ -49,14 +48,10 @@ def load_classifier():
|
|||||||
os.unlink(settings.MODEL_FILE)
|
os.unlink(settings.MODEL_FILE)
|
||||||
classifier = None
|
classifier = None
|
||||||
except OSError:
|
except OSError:
|
||||||
logger.exception(
|
logger.exception(f"IO error while loading document classification model")
|
||||||
f"IO error while loading document classification model"
|
|
||||||
)
|
|
||||||
classifier = None
|
classifier = None
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception(
|
logger.exception(f"Unknown error while loading document classification model")
|
||||||
f"Unknown error while loading document classification model"
|
|
||||||
)
|
|
||||||
classifier = None
|
classifier = None
|
||||||
|
|
||||||
return classifier
|
return classifier
|
||||||
@ -83,7 +78,8 @@ class DocumentClassifier(object):
|
|||||||
|
|
||||||
if schema_version != self.FORMAT_VERSION:
|
if schema_version != self.FORMAT_VERSION:
|
||||||
raise IncompatibleClassifierVersionError(
|
raise IncompatibleClassifierVersionError(
|
||||||
"Cannor load classifier, incompatible versions.")
|
"Cannor load classifier, incompatible versions."
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
self.data_hash = pickle.load(f)
|
self.data_hash = pickle.load(f)
|
||||||
@ -125,30 +121,37 @@ class DocumentClassifier(object):
|
|||||||
# Step 1: Extract and preprocess training data from the database.
|
# Step 1: Extract and preprocess training data from the database.
|
||||||
logger.debug("Gathering data from database...")
|
logger.debug("Gathering data from database...")
|
||||||
m = hashlib.sha1()
|
m = hashlib.sha1()
|
||||||
for doc in Document.objects.order_by('pk').exclude(tags__is_inbox_tag=True): # NOQA: E501
|
for doc in Document.objects.order_by("pk").exclude(
|
||||||
|
tags__is_inbox_tag=True
|
||||||
|
): # NOQA: E501
|
||||||
preprocessed_content = preprocess_content(doc.content)
|
preprocessed_content = preprocess_content(doc.content)
|
||||||
m.update(preprocessed_content.encode('utf-8'))
|
m.update(preprocessed_content.encode("utf-8"))
|
||||||
data.append(preprocessed_content)
|
data.append(preprocessed_content)
|
||||||
|
|
||||||
y = -1
|
y = -1
|
||||||
dt = doc.document_type
|
dt = doc.document_type
|
||||||
if dt and dt.matching_algorithm == MatchingModel.MATCH_AUTO:
|
if dt and dt.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||||
y = dt.pk
|
y = dt.pk
|
||||||
m.update(y.to_bytes(4, 'little', signed=True))
|
m.update(y.to_bytes(4, "little", signed=True))
|
||||||
labels_document_type.append(y)
|
labels_document_type.append(y)
|
||||||
|
|
||||||
y = -1
|
y = -1
|
||||||
cor = doc.correspondent
|
cor = doc.correspondent
|
||||||
if cor and cor.matching_algorithm == MatchingModel.MATCH_AUTO:
|
if cor and cor.matching_algorithm == MatchingModel.MATCH_AUTO:
|
||||||
y = cor.pk
|
y = cor.pk
|
||||||
m.update(y.to_bytes(4, 'little', signed=True))
|
m.update(y.to_bytes(4, "little", signed=True))
|
||||||
labels_correspondent.append(y)
|
labels_correspondent.append(y)
|
||||||
|
|
||||||
tags = sorted([tag.pk for tag in doc.tags.filter(
|
tags = sorted(
|
||||||
|
[
|
||||||
|
tag.pk
|
||||||
|
for tag in doc.tags.filter(
|
||||||
matching_algorithm=MatchingModel.MATCH_AUTO
|
matching_algorithm=MatchingModel.MATCH_AUTO
|
||||||
)])
|
)
|
||||||
|
]
|
||||||
|
)
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
m.update(tag.to_bytes(4, 'little', signed=True))
|
m.update(tag.to_bytes(4, "little", signed=True))
|
||||||
labels_tags.append(tags)
|
labels_tags.append(tags)
|
||||||
|
|
||||||
if not data:
|
if not data:
|
||||||
@ -174,10 +177,7 @@ class DocumentClassifier(object):
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
"{} documents, {} tag(s), {} correspondent(s), "
|
"{} documents, {} tag(s), {} correspondent(s), "
|
||||||
"{} document type(s).".format(
|
"{} document type(s).".format(
|
||||||
len(data),
|
len(data), num_tags, num_correspondents, num_document_types
|
||||||
num_tags,
|
|
||||||
num_correspondents,
|
|
||||||
num_document_types
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -188,9 +188,7 @@ class DocumentClassifier(object):
|
|||||||
# Step 2: vectorize data
|
# Step 2: vectorize data
|
||||||
logger.debug("Vectorizing data...")
|
logger.debug("Vectorizing data...")
|
||||||
self.data_vectorizer = CountVectorizer(
|
self.data_vectorizer = CountVectorizer(
|
||||||
analyzer="word",
|
analyzer="word", ngram_range=(1, 2), min_df=0.01
|
||||||
ngram_range=(1, 2),
|
|
||||||
min_df=0.01
|
|
||||||
)
|
)
|
||||||
data_vectorized = self.data_vectorizer.fit_transform(data)
|
data_vectorized = self.data_vectorizer.fit_transform(data)
|
||||||
|
|
||||||
@ -201,54 +199,41 @@ class DocumentClassifier(object):
|
|||||||
if num_tags == 1:
|
if num_tags == 1:
|
||||||
# Special case where only one tag has auto:
|
# Special case where only one tag has auto:
|
||||||
# Fallback to binary classification.
|
# Fallback to binary classification.
|
||||||
labels_tags = [label[0] if len(label) == 1 else -1
|
labels_tags = [
|
||||||
for label in labels_tags]
|
label[0] if len(label) == 1 else -1 for label in labels_tags
|
||||||
|
]
|
||||||
self.tags_binarizer = LabelBinarizer()
|
self.tags_binarizer = LabelBinarizer()
|
||||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
||||||
labels_tags).ravel()
|
labels_tags
|
||||||
|
).ravel()
|
||||||
else:
|
else:
|
||||||
self.tags_binarizer = MultiLabelBinarizer()
|
self.tags_binarizer = MultiLabelBinarizer()
|
||||||
labels_tags_vectorized = self.tags_binarizer.fit_transform(
|
labels_tags_vectorized = self.tags_binarizer.fit_transform(labels_tags)
|
||||||
labels_tags)
|
|
||||||
|
|
||||||
self.tags_classifier = MLPClassifier(tol=0.01)
|
self.tags_classifier = MLPClassifier(tol=0.01)
|
||||||
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
self.tags_classifier.fit(data_vectorized, labels_tags_vectorized)
|
||||||
else:
|
else:
|
||||||
self.tags_classifier = None
|
self.tags_classifier = None
|
||||||
logger.debug(
|
logger.debug("There are no tags. Not training tags classifier.")
|
||||||
"There are no tags. Not training tags classifier."
|
|
||||||
)
|
|
||||||
|
|
||||||
if num_correspondents > 0:
|
if num_correspondents > 0:
|
||||||
logger.debug(
|
logger.debug("Training correspondent classifier...")
|
||||||
"Training correspondent classifier..."
|
|
||||||
)
|
|
||||||
self.correspondent_classifier = MLPClassifier(tol=0.01)
|
self.correspondent_classifier = MLPClassifier(tol=0.01)
|
||||||
self.correspondent_classifier.fit(
|
self.correspondent_classifier.fit(data_vectorized, labels_correspondent)
|
||||||
data_vectorized,
|
|
||||||
labels_correspondent
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.correspondent_classifier = None
|
self.correspondent_classifier = None
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"There are no correspondents. Not training correspondent "
|
"There are no correspondents. Not training correspondent " "classifier."
|
||||||
"classifier."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if num_document_types > 0:
|
if num_document_types > 0:
|
||||||
logger.debug(
|
logger.debug("Training document type classifier...")
|
||||||
"Training document type classifier..."
|
|
||||||
)
|
|
||||||
self.document_type_classifier = MLPClassifier(tol=0.01)
|
self.document_type_classifier = MLPClassifier(tol=0.01)
|
||||||
self.document_type_classifier.fit(
|
self.document_type_classifier.fit(data_vectorized, labels_document_type)
|
||||||
data_vectorized,
|
|
||||||
labels_document_type
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
self.document_type_classifier = None
|
self.document_type_classifier = None
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"There are no document types. Not training document type "
|
"There are no document types. Not training document type " "classifier."
|
||||||
"classifier."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.data_hash = new_data_hash
|
self.data_hash = new_data_hash
|
||||||
@ -284,10 +269,10 @@ class DocumentClassifier(object):
|
|||||||
X = self.data_vectorizer.transform([preprocess_content(content)])
|
X = self.data_vectorizer.transform([preprocess_content(content)])
|
||||||
y = self.tags_classifier.predict(X)
|
y = self.tags_classifier.predict(X)
|
||||||
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
tags_ids = self.tags_binarizer.inverse_transform(y)[0]
|
||||||
if type_of_target(y).startswith('multilabel'):
|
if type_of_target(y).startswith("multilabel"):
|
||||||
# the usual case when there are multiple tags.
|
# the usual case when there are multiple tags.
|
||||||
return list(tags_ids)
|
return list(tags_ids)
|
||||||
elif type_of_target(y) == 'binary' and tags_ids != -1:
|
elif type_of_target(y) == "binary" and tags_ids != -1:
|
||||||
# This is for when we have binary classification with only one
|
# This is for when we have binary classification with only one
|
||||||
# tag and the result is to assign this tag.
|
# tag and the result is to assign this tag.
|
||||||
return [tags_ids]
|
return [tags_ids]
|
||||||
|
@ -15,15 +15,11 @@ from filelock import FileLock
|
|||||||
from rest_framework.reverse import reverse
|
from rest_framework.reverse import reverse
|
||||||
|
|
||||||
from .classifier import load_classifier
|
from .classifier import load_classifier
|
||||||
from .file_handling import create_source_path_directory, \
|
from .file_handling import create_source_path_directory, generate_unique_filename
|
||||||
generate_unique_filename
|
|
||||||
from .loggers import LoggingMixin
|
from .loggers import LoggingMixin
|
||||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
|
||||||
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
from .parsers import ParseError, get_parser_class_for_mime_type, parse_date
|
||||||
from .signals import (
|
from .signals import document_consumption_finished, document_consumption_started
|
||||||
document_consumption_finished,
|
|
||||||
document_consumption_started
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ConsumerError(Exception):
|
class ConsumerError(Exception):
|
||||||
@ -49,23 +45,26 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
logging_name = "paperless.consumer"
|
logging_name = "paperless.consumer"
|
||||||
|
|
||||||
def _send_progress(self, current_progress, max_progress, status,
|
def _send_progress(
|
||||||
message=None, document_id=None):
|
self, current_progress, max_progress, status, message=None, document_id=None
|
||||||
|
):
|
||||||
payload = {
|
payload = {
|
||||||
'filename': os.path.basename(self.filename) if self.filename else None, # NOQA: E501
|
"filename": os.path.basename(self.filename)
|
||||||
'task_id': self.task_id,
|
if self.filename
|
||||||
'current_progress': current_progress,
|
else None, # NOQA: E501
|
||||||
'max_progress': max_progress,
|
"task_id": self.task_id,
|
||||||
'status': status,
|
"current_progress": current_progress,
|
||||||
'message': message,
|
"max_progress": max_progress,
|
||||||
'document_id': document_id
|
"status": status,
|
||||||
|
"message": message,
|
||||||
|
"document_id": document_id,
|
||||||
}
|
}
|
||||||
async_to_sync(self.channel_layer.group_send)("status_updates",
|
async_to_sync(self.channel_layer.group_send)(
|
||||||
{'type': 'status_update',
|
"status_updates", {"type": "status_update", "data": payload}
|
||||||
'data': payload})
|
)
|
||||||
|
|
||||||
def _fail(self, message, log_message=None, exc_info=None):
|
def _fail(self, message, log_message=None, exc_info=None):
|
||||||
self._send_progress(100, 100, 'FAILED', message)
|
self._send_progress(100, 100, "FAILED", message)
|
||||||
self.log("error", log_message or message, exc_info=exc_info)
|
self.log("error", log_message or message, exc_info=exc_info)
|
||||||
raise ConsumerError(f"{self.filename}: {log_message or message}")
|
raise ConsumerError(f"{self.filename}: {log_message or message}")
|
||||||
|
|
||||||
@ -84,19 +83,20 @@ class Consumer(LoggingMixin):
|
|||||||
def pre_check_file_exists(self):
|
def pre_check_file_exists(self):
|
||||||
if not os.path.isfile(self.path):
|
if not os.path.isfile(self.path):
|
||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_FILE_NOT_FOUND,
|
MESSAGE_FILE_NOT_FOUND, f"Cannot consume {self.path}: File not found."
|
||||||
f"Cannot consume {self.path}: File not found."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def pre_check_duplicate(self):
|
def pre_check_duplicate(self):
|
||||||
with open(self.path, "rb") as f:
|
with open(self.path, "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
if Document.objects.filter(Q(checksum=checksum) | Q(archive_checksum=checksum)).exists(): # NOQA: E501
|
if Document.objects.filter(
|
||||||
|
Q(checksum=checksum) | Q(archive_checksum=checksum)
|
||||||
|
).exists(): # NOQA: E501
|
||||||
if settings.CONSUMER_DELETE_DUPLICATES:
|
if settings.CONSUMER_DELETE_DUPLICATES:
|
||||||
os.unlink(self.path)
|
os.unlink(self.path)
|
||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_DOCUMENT_ALREADY_EXISTS,
|
MESSAGE_DOCUMENT_ALREADY_EXISTS,
|
||||||
f"Not consuming {self.filename}: It is a duplicate."
|
f"Not consuming {self.filename}: It is a duplicate.",
|
||||||
)
|
)
|
||||||
|
|
||||||
def pre_check_directories(self):
|
def pre_check_directories(self):
|
||||||
@ -113,10 +113,10 @@ class Consumer(LoggingMixin):
|
|||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
|
MESSAGE_PRE_CONSUME_SCRIPT_NOT_FOUND,
|
||||||
f"Configured pre-consume script "
|
f"Configured pre-consume script "
|
||||||
f"{settings.PRE_CONSUME_SCRIPT} does not exist.")
|
f"{settings.PRE_CONSUME_SCRIPT} does not exist.",
|
||||||
|
)
|
||||||
|
|
||||||
self.log("info",
|
self.log("info", f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
||||||
f"Executing pre-consume script {settings.PRE_CONSUME_SCRIPT}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
Popen((settings.PRE_CONSUME_SCRIPT, self.path)).wait()
|
||||||
@ -124,7 +124,7 @@ class Consumer(LoggingMixin):
|
|||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
|
MESSAGE_PRE_CONSUME_SCRIPT_ERROR,
|
||||||
f"Error while executing pre-consume script: {e}",
|
f"Error while executing pre-consume script: {e}",
|
||||||
exc_info=True
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_post_consume_script(self, document):
|
def run_post_consume_script(self, document):
|
||||||
@ -135,16 +135,16 @@ class Consumer(LoggingMixin):
|
|||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
|
MESSAGE_POST_CONSUME_SCRIPT_NOT_FOUND,
|
||||||
f"Configured post-consume script "
|
f"Configured post-consume script "
|
||||||
f"{settings.POST_CONSUME_SCRIPT} does not exist."
|
f"{settings.POST_CONSUME_SCRIPT} does not exist.",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.log(
|
self.log(
|
||||||
"info",
|
"info", f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
||||||
f"Executing post-consume script {settings.POST_CONSUME_SCRIPT}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
Popen((
|
Popen(
|
||||||
|
(
|
||||||
settings.POST_CONSUME_SCRIPT,
|
settings.POST_CONSUME_SCRIPT,
|
||||||
str(document.pk),
|
str(document.pk),
|
||||||
document.get_public_filename(),
|
document.get_public_filename(),
|
||||||
@ -153,24 +153,26 @@ class Consumer(LoggingMixin):
|
|||||||
reverse("document-download", kwargs={"pk": document.pk}),
|
reverse("document-download", kwargs={"pk": document.pk}),
|
||||||
reverse("document-thumb", kwargs={"pk": document.pk}),
|
reverse("document-thumb", kwargs={"pk": document.pk}),
|
||||||
str(document.correspondent),
|
str(document.correspondent),
|
||||||
str(",".join(document.tags.all().values_list(
|
str(",".join(document.tags.all().values_list("name", flat=True))),
|
||||||
"name", flat=True)))
|
)
|
||||||
)).wait()
|
).wait()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._fail(
|
self._fail(
|
||||||
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
|
MESSAGE_POST_CONSUME_SCRIPT_ERROR,
|
||||||
f"Error while executing post-consume script: {e}",
|
f"Error while executing post-consume script: {e}",
|
||||||
exc_info=True
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def try_consume_file(self,
|
def try_consume_file(
|
||||||
|
self,
|
||||||
path,
|
path,
|
||||||
override_filename=None,
|
override_filename=None,
|
||||||
override_title=None,
|
override_title=None,
|
||||||
override_correspondent_id=None,
|
override_correspondent_id=None,
|
||||||
override_document_type_id=None,
|
override_document_type_id=None,
|
||||||
override_tag_ids=None,
|
override_tag_ids=None,
|
||||||
task_id=None):
|
task_id=None,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Return the document object if it was successfully created.
|
Return the document object if it was successfully created.
|
||||||
"""
|
"""
|
||||||
@ -183,7 +185,7 @@ class Consumer(LoggingMixin):
|
|||||||
self.override_tag_ids = override_tag_ids
|
self.override_tag_ids = override_tag_ids
|
||||||
self.task_id = task_id or str(uuid.uuid4())
|
self.task_id = task_id or str(uuid.uuid4())
|
||||||
|
|
||||||
self._send_progress(0, 100, 'STARTING', MESSAGE_NEW_FILE)
|
self._send_progress(0, 100, "STARTING", MESSAGE_NEW_FILE)
|
||||||
|
|
||||||
# this is for grouping logging entries for this particular file
|
# this is for grouping logging entries for this particular file
|
||||||
# together.
|
# together.
|
||||||
@ -206,17 +208,12 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
self._fail(
|
self._fail(MESSAGE_UNSUPPORTED_TYPE, f"Unsupported mime type {mime_type}")
|
||||||
MESSAGE_UNSUPPORTED_TYPE,
|
|
||||||
f"Unsupported mime type {mime_type}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Notify all listeners that we're going to do some work.
|
# Notify all listeners that we're going to do some work.
|
||||||
|
|
||||||
document_consumption_started.send(
|
document_consumption_started.send(
|
||||||
sender=self.__class__,
|
sender=self.__class__, filename=self.path, logging_group=self.logging_group
|
||||||
filename=self.path,
|
|
||||||
logging_group=self.logging_group
|
|
||||||
)
|
)
|
||||||
|
|
||||||
self.run_pre_consume_script()
|
self.run_pre_consume_script()
|
||||||
@ -243,21 +240,20 @@ class Consumer(LoggingMixin):
|
|||||||
archive_path = None
|
archive_path = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self._send_progress(20, 100, 'WORKING', MESSAGE_PARSING_DOCUMENT)
|
self._send_progress(20, 100, "WORKING", MESSAGE_PARSING_DOCUMENT)
|
||||||
self.log("debug", "Parsing {}...".format(self.filename))
|
self.log("debug", "Parsing {}...".format(self.filename))
|
||||||
document_parser.parse(self.path, mime_type, self.filename)
|
document_parser.parse(self.path, mime_type, self.filename)
|
||||||
|
|
||||||
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
self.log("debug", f"Generating thumbnail for {self.filename}...")
|
||||||
self._send_progress(70, 100, 'WORKING',
|
self._send_progress(70, 100, "WORKING", MESSAGE_GENERATING_THUMBNAIL)
|
||||||
MESSAGE_GENERATING_THUMBNAIL)
|
|
||||||
thumbnail = document_parser.get_optimised_thumbnail(
|
thumbnail = document_parser.get_optimised_thumbnail(
|
||||||
self.path, mime_type, self.filename)
|
self.path, mime_type, self.filename
|
||||||
|
)
|
||||||
|
|
||||||
text = document_parser.get_text()
|
text = document_parser.get_text()
|
||||||
date = document_parser.get_date()
|
date = document_parser.get_date()
|
||||||
if not date:
|
if not date:
|
||||||
self._send_progress(90, 100, 'WORKING',
|
self._send_progress(90, 100, "WORKING", MESSAGE_PARSE_DATE)
|
||||||
MESSAGE_PARSE_DATE)
|
|
||||||
date = parse_date(self.filename, text)
|
date = parse_date(self.filename, text)
|
||||||
archive_path = document_parser.get_archive_path()
|
archive_path = document_parser.get_archive_path()
|
||||||
|
|
||||||
@ -266,7 +262,7 @@ class Consumer(LoggingMixin):
|
|||||||
self._fail(
|
self._fail(
|
||||||
str(e),
|
str(e),
|
||||||
f"Error while consuming document {self.filename}: {e}",
|
f"Error while consuming document {self.filename}: {e}",
|
||||||
exc_info=True
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Prepare the document classifier.
|
# Prepare the document classifier.
|
||||||
@ -277,18 +273,14 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
self._send_progress(95, 100, 'WORKING', MESSAGE_SAVE_DOCUMENT)
|
self._send_progress(95, 100, "WORKING", MESSAGE_SAVE_DOCUMENT)
|
||||||
# now that everything is done, we can start to store the document
|
# now that everything is done, we can start to store the document
|
||||||
# in the system. This will be a transaction and reasonably fast.
|
# in the system. This will be a transaction and reasonably fast.
|
||||||
try:
|
try:
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
|
|
||||||
# store the document.
|
# store the document.
|
||||||
document = self._store(
|
document = self._store(text=text, date=date, mime_type=mime_type)
|
||||||
text=text,
|
|
||||||
date=date,
|
|
||||||
mime_type=mime_type
|
|
||||||
)
|
|
||||||
|
|
||||||
# If we get here, it was successful. Proceed with post-consume
|
# If we get here, it was successful. Proceed with post-consume
|
||||||
# hooks. If they fail, nothing will get changed.
|
# hooks. If they fail, nothing will get changed.
|
||||||
@ -297,7 +289,7 @@ class Consumer(LoggingMixin):
|
|||||||
sender=self.__class__,
|
sender=self.__class__,
|
||||||
document=document,
|
document=document,
|
||||||
logging_group=self.logging_group,
|
logging_group=self.logging_group,
|
||||||
classifier=classifier
|
classifier=classifier,
|
||||||
)
|
)
|
||||||
|
|
||||||
# After everything is in the database, copy the files into
|
# After everything is in the database, copy the files into
|
||||||
@ -306,24 +298,25 @@ class Consumer(LoggingMixin):
|
|||||||
document.filename = generate_unique_filename(document)
|
document.filename = generate_unique_filename(document)
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
|
|
||||||
self._write(document.storage_type,
|
self._write(document.storage_type, self.path, document.source_path)
|
||||||
self.path, document.source_path)
|
|
||||||
|
|
||||||
self._write(document.storage_type,
|
self._write(
|
||||||
thumbnail, document.thumbnail_path)
|
document.storage_type, thumbnail, document.thumbnail_path
|
||||||
|
)
|
||||||
|
|
||||||
if archive_path and os.path.isfile(archive_path):
|
if archive_path and os.path.isfile(archive_path):
|
||||||
document.archive_filename = generate_unique_filename(
|
document.archive_filename = generate_unique_filename(
|
||||||
document,
|
document, archive_filename=True
|
||||||
archive_filename=True
|
|
||||||
)
|
)
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
self._write(document.storage_type,
|
self._write(
|
||||||
archive_path, document.archive_path)
|
document.storage_type, archive_path, document.archive_path
|
||||||
|
)
|
||||||
|
|
||||||
with open(archive_path, 'rb') as f:
|
with open(archive_path, "rb") as f:
|
||||||
document.archive_checksum = hashlib.md5(
|
document.archive_checksum = hashlib.md5(
|
||||||
f.read()).hexdigest()
|
f.read()
|
||||||
|
).hexdigest()
|
||||||
|
|
||||||
# Don't save with the lock active. Saving will cause the file
|
# Don't save with the lock active. Saving will cause the file
|
||||||
# renaming logic to aquire the lock as well.
|
# renaming logic to aquire the lock as well.
|
||||||
@ -335,8 +328,8 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
# https://github.com/jonaswinkler/paperless-ng/discussions/1037
|
||||||
shadow_file = os.path.join(
|
shadow_file = os.path.join(
|
||||||
os.path.dirname(self.path),
|
os.path.dirname(self.path), "._" + os.path.basename(self.path)
|
||||||
"._" + os.path.basename(self.path))
|
)
|
||||||
|
|
||||||
if os.path.isfile(shadow_file):
|
if os.path.isfile(shadow_file):
|
||||||
self.log("debug", "Deleting file {}".format(shadow_file))
|
self.log("debug", "Deleting file {}".format(shadow_file))
|
||||||
@ -345,21 +338,17 @@ class Consumer(LoggingMixin):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self._fail(
|
self._fail(
|
||||||
str(e),
|
str(e),
|
||||||
f"The following error occured while consuming "
|
f"The following error occured while consuming " f"{self.filename}: {e}",
|
||||||
f"{self.filename}: {e}",
|
exc_info=True,
|
||||||
exc_info=True
|
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
document_parser.cleanup()
|
document_parser.cleanup()
|
||||||
|
|
||||||
self.run_post_consume_script(document)
|
self.run_post_consume_script(document)
|
||||||
|
|
||||||
self.log(
|
self.log("info", "Document {} consumption finished".format(document))
|
||||||
"info",
|
|
||||||
"Document {} consumption finished".format(document)
|
|
||||||
)
|
|
||||||
|
|
||||||
self._send_progress(100, 100, 'SUCCESS', MESSAGE_FINISHED, document.id)
|
self._send_progress(100, 100, "SUCCESS", MESSAGE_FINISHED, document.id)
|
||||||
|
|
||||||
return document
|
return document
|
||||||
|
|
||||||
@ -373,8 +362,11 @@ class Consumer(LoggingMixin):
|
|||||||
|
|
||||||
self.log("debug", "Saving record to database")
|
self.log("debug", "Saving record to database")
|
||||||
|
|
||||||
created = file_info.created or date or timezone.make_aware(
|
created = (
|
||||||
datetime.datetime.fromtimestamp(stats.st_mtime))
|
file_info.created
|
||||||
|
or date
|
||||||
|
or timezone.make_aware(datetime.datetime.fromtimestamp(stats.st_mtime))
|
||||||
|
)
|
||||||
|
|
||||||
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
storage_type = Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
|
||||||
@ -386,7 +378,7 @@ class Consumer(LoggingMixin):
|
|||||||
checksum=hashlib.md5(f.read()).hexdigest(),
|
checksum=hashlib.md5(f.read()).hexdigest(),
|
||||||
created=created,
|
created=created,
|
||||||
modified=created,
|
modified=created,
|
||||||
storage_type=storage_type
|
storage_type=storage_type,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.apply_overrides(document)
|
self.apply_overrides(document)
|
||||||
@ -398,11 +390,13 @@ class Consumer(LoggingMixin):
|
|||||||
def apply_overrides(self, document):
|
def apply_overrides(self, document):
|
||||||
if self.override_correspondent_id:
|
if self.override_correspondent_id:
|
||||||
document.correspondent = Correspondent.objects.get(
|
document.correspondent = Correspondent.objects.get(
|
||||||
pk=self.override_correspondent_id)
|
pk=self.override_correspondent_id
|
||||||
|
)
|
||||||
|
|
||||||
if self.override_document_type_id:
|
if self.override_document_type_id:
|
||||||
document.document_type = DocumentType.objects.get(
|
document.document_type = DocumentType.objects.get(
|
||||||
pk=self.override_document_type_id)
|
pk=self.override_document_type_id
|
||||||
|
)
|
||||||
|
|
||||||
if self.override_tag_ids:
|
if self.override_tag_ids:
|
||||||
for tag_id in self.override_tag_ids:
|
for tag_id in self.override_tag_ids:
|
||||||
|
@ -12,7 +12,6 @@ logger = logging.getLogger("paperless.filehandling")
|
|||||||
|
|
||||||
|
|
||||||
class defaultdictNoStr(defaultdict):
|
class defaultdictNoStr(defaultdict):
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
raise ValueError("Don't use {tags} directly.")
|
raise ValueError("Don't use {tags} directly.")
|
||||||
|
|
||||||
@ -63,24 +62,23 @@ def many_to_dictionary(field):
|
|||||||
mydictionary[index] = slugify(t.name)
|
mydictionary[index] = slugify(t.name)
|
||||||
|
|
||||||
# Find delimiter
|
# Find delimiter
|
||||||
delimiter = t.name.find('_')
|
delimiter = t.name.find("_")
|
||||||
|
|
||||||
if delimiter == -1:
|
if delimiter == -1:
|
||||||
delimiter = t.name.find('-')
|
delimiter = t.name.find("-")
|
||||||
|
|
||||||
if delimiter == -1:
|
if delimiter == -1:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
key = t.name[:delimiter]
|
key = t.name[:delimiter]
|
||||||
value = t.name[delimiter + 1:]
|
value = t.name[delimiter + 1 :]
|
||||||
|
|
||||||
mydictionary[slugify(key)] = slugify(value)
|
mydictionary[slugify(key)] = slugify(value)
|
||||||
|
|
||||||
return mydictionary
|
return mydictionary
|
||||||
|
|
||||||
|
|
||||||
def generate_unique_filename(doc,
|
def generate_unique_filename(doc, archive_filename=False):
|
||||||
archive_filename=False):
|
|
||||||
"""
|
"""
|
||||||
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
Generates a unique filename for doc in settings.ORIGINALS_DIR.
|
||||||
|
|
||||||
@ -104,14 +102,17 @@ def generate_unique_filename(doc,
|
|||||||
|
|
||||||
if archive_filename and doc.filename:
|
if archive_filename and doc.filename:
|
||||||
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
new_filename = os.path.splitext(doc.filename)[0] + ".pdf"
|
||||||
if new_filename == old_filename or not os.path.exists(os.path.join(root, new_filename)): # NOQA: E501
|
if new_filename == old_filename or not os.path.exists(
|
||||||
|
os.path.join(root, new_filename)
|
||||||
|
): # NOQA: E501
|
||||||
return new_filename
|
return new_filename
|
||||||
|
|
||||||
counter = 0
|
counter = 0
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
new_filename = generate_filename(
|
new_filename = generate_filename(
|
||||||
doc, counter, archive_filename=archive_filename)
|
doc, counter, archive_filename=archive_filename
|
||||||
|
)
|
||||||
if new_filename == old_filename:
|
if new_filename == old_filename:
|
||||||
# still the same as before.
|
# still the same as before.
|
||||||
return new_filename
|
return new_filename
|
||||||
@ -127,14 +128,11 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||||
tags = defaultdictNoStr(lambda: slugify(None),
|
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
|
||||||
many_to_dictionary(doc.tags))
|
|
||||||
|
|
||||||
tag_list = pathvalidate.sanitize_filename(
|
tag_list = pathvalidate.sanitize_filename(
|
||||||
",".join(sorted(
|
",".join(sorted([tag.name for tag in doc.tags.all()])),
|
||||||
[tag.name for tag in doc.tags.all()]
|
replacement_text="-",
|
||||||
)),
|
|
||||||
replacement_text="-"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc.correspondent:
|
if doc.correspondent:
|
||||||
@ -157,13 +155,14 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
asn = "none"
|
asn = "none"
|
||||||
|
|
||||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||||
title=pathvalidate.sanitize_filename(
|
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
|
||||||
doc.title, replacement_text="-"),
|
|
||||||
correspondent=correspondent,
|
correspondent=correspondent,
|
||||||
document_type=document_type,
|
document_type=document_type,
|
||||||
created=datetime.date.isoformat(doc.created),
|
created=datetime.date.isoformat(doc.created),
|
||||||
created_year=doc.created.year if doc.created else "none",
|
created_year=doc.created.year if doc.created else "none",
|
||||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
created_month=f"{doc.created.month:02}"
|
||||||
|
if doc.created
|
||||||
|
else "none", # NOQA: E501
|
||||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||||
added=datetime.date.isoformat(doc.added),
|
added=datetime.date.isoformat(doc.added),
|
||||||
added_year=doc.added.year if doc.added else "none",
|
added_year=doc.added.year if doc.added else "none",
|
||||||
@ -171,7 +170,7 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||||
asn=asn,
|
asn=asn,
|
||||||
tags=tags,
|
tags=tags,
|
||||||
tag_list=tag_list
|
tag_list=tag_list,
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
path = path.strip(os.sep)
|
path = path.strip(os.sep)
|
||||||
@ -179,7 +178,8 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
except (ValueError, KeyError, IndexError):
|
except (ValueError, KeyError, IndexError):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
|
||||||
|
)
|
||||||
|
|
||||||
counter_str = f"_{counter:02}" if counter else ""
|
counter_str = f"_{counter:02}" if counter else ""
|
||||||
|
|
||||||
|
@ -10,34 +10,24 @@ DATE_KWARGS = ["year", "month", "day", "date__gt", "gt", "date__lt", "lt"]
|
|||||||
|
|
||||||
|
|
||||||
class CorrespondentFilterSet(FilterSet):
|
class CorrespondentFilterSet(FilterSet):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Correspondent
|
model = Correspondent
|
||||||
fields = {
|
fields = {"name": CHAR_KWARGS}
|
||||||
"name": CHAR_KWARGS
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TagFilterSet(FilterSet):
|
class TagFilterSet(FilterSet):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Tag
|
model = Tag
|
||||||
fields = {
|
fields = {"name": CHAR_KWARGS}
|
||||||
"name": CHAR_KWARGS
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DocumentTypeFilterSet(FilterSet):
|
class DocumentTypeFilterSet(FilterSet):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = DocumentType
|
model = DocumentType
|
||||||
fields = {
|
fields = {"name": CHAR_KWARGS}
|
||||||
"name": CHAR_KWARGS
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class TagsFilter(Filter):
|
class TagsFilter(Filter):
|
||||||
|
|
||||||
def __init__(self, exclude=False, in_list=False):
|
def __init__(self, exclude=False, in_list=False):
|
||||||
super(TagsFilter, self).__init__()
|
super(TagsFilter, self).__init__()
|
||||||
self.exclude = exclude
|
self.exclude = exclude
|
||||||
@ -48,7 +38,7 @@ class TagsFilter(Filter):
|
|||||||
return qs
|
return qs
|
||||||
|
|
||||||
try:
|
try:
|
||||||
tag_ids = [int(x) for x in value.split(',')]
|
tag_ids = [int(x) for x in value.split(",")]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
@ -65,22 +55,19 @@ class TagsFilter(Filter):
|
|||||||
|
|
||||||
|
|
||||||
class InboxFilter(Filter):
|
class InboxFilter(Filter):
|
||||||
|
|
||||||
def filter(self, qs, value):
|
def filter(self, qs, value):
|
||||||
if value == 'true':
|
if value == "true":
|
||||||
return qs.filter(tags__is_inbox_tag=True)
|
return qs.filter(tags__is_inbox_tag=True)
|
||||||
elif value == 'false':
|
elif value == "false":
|
||||||
return qs.exclude(tags__is_inbox_tag=True)
|
return qs.exclude(tags__is_inbox_tag=True)
|
||||||
else:
|
else:
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
|
|
||||||
class TitleContentFilter(Filter):
|
class TitleContentFilter(Filter):
|
||||||
|
|
||||||
def filter(self, qs, value):
|
def filter(self, qs, value):
|
||||||
if value:
|
if value:
|
||||||
return qs.filter(Q(title__icontains=value) |
|
return qs.filter(Q(title__icontains=value) | Q(content__icontains=value))
|
||||||
Q(content__icontains=value))
|
|
||||||
else:
|
else:
|
||||||
return qs
|
return qs
|
||||||
|
|
||||||
@ -88,10 +75,7 @@ class TitleContentFilter(Filter):
|
|||||||
class DocumentFilterSet(FilterSet):
|
class DocumentFilterSet(FilterSet):
|
||||||
|
|
||||||
is_tagged = BooleanFilter(
|
is_tagged = BooleanFilter(
|
||||||
label="Is tagged",
|
label="Is tagged", field_name="tags", lookup_expr="isnull", exclude=True
|
||||||
field_name="tags",
|
|
||||||
lookup_expr="isnull",
|
|
||||||
exclude=True
|
|
||||||
)
|
)
|
||||||
|
|
||||||
tags__id__all = TagsFilter()
|
tags__id__all = TagsFilter()
|
||||||
@ -107,38 +91,24 @@ class DocumentFilterSet(FilterSet):
|
|||||||
class Meta:
|
class Meta:
|
||||||
model = Document
|
model = Document
|
||||||
fields = {
|
fields = {
|
||||||
|
|
||||||
"title": CHAR_KWARGS,
|
"title": CHAR_KWARGS,
|
||||||
"content": CHAR_KWARGS,
|
"content": CHAR_KWARGS,
|
||||||
|
|
||||||
"archive_serial_number": INT_KWARGS,
|
"archive_serial_number": INT_KWARGS,
|
||||||
|
|
||||||
"created": DATE_KWARGS,
|
"created": DATE_KWARGS,
|
||||||
"added": DATE_KWARGS,
|
"added": DATE_KWARGS,
|
||||||
"modified": DATE_KWARGS,
|
"modified": DATE_KWARGS,
|
||||||
|
|
||||||
"correspondent": ["isnull"],
|
"correspondent": ["isnull"],
|
||||||
"correspondent__id": ID_KWARGS,
|
"correspondent__id": ID_KWARGS,
|
||||||
"correspondent__name": CHAR_KWARGS,
|
"correspondent__name": CHAR_KWARGS,
|
||||||
|
|
||||||
"tags__id": ID_KWARGS,
|
"tags__id": ID_KWARGS,
|
||||||
"tags__name": CHAR_KWARGS,
|
"tags__name": CHAR_KWARGS,
|
||||||
|
|
||||||
"document_type": ["isnull"],
|
"document_type": ["isnull"],
|
||||||
"document_type__id": ID_KWARGS,
|
"document_type__id": ID_KWARGS,
|
||||||
"document_type__name": CHAR_KWARGS,
|
"document_type__name": CHAR_KWARGS,
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class LogFilterSet(FilterSet):
|
class LogFilterSet(FilterSet):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Log
|
model = Log
|
||||||
fields = {
|
fields = {"level": INT_KWARGS, "created": DATE_KWARGS, "group": ID_KWARGS}
|
||||||
|
|
||||||
"level": INT_KWARGS,
|
|
||||||
"created": DATE_KWARGS,
|
|
||||||
"group": ID_KWARGS
|
|
||||||
|
|
||||||
}
|
|
||||||
|
@ -21,51 +21,22 @@ logger = logging.getLogger("paperless.index")
|
|||||||
|
|
||||||
def get_schema():
|
def get_schema():
|
||||||
return Schema(
|
return Schema(
|
||||||
id=NUMERIC(
|
id=NUMERIC(stored=True, unique=True),
|
||||||
stored=True,
|
title=TEXT(sortable=True),
|
||||||
unique=True
|
|
||||||
),
|
|
||||||
title=TEXT(
|
|
||||||
sortable=True
|
|
||||||
),
|
|
||||||
content=TEXT(),
|
content=TEXT(),
|
||||||
asn=NUMERIC(
|
asn=NUMERIC(sortable=True),
|
||||||
sortable=True
|
correspondent=TEXT(sortable=True),
|
||||||
),
|
|
||||||
|
|
||||||
correspondent=TEXT(
|
|
||||||
sortable=True
|
|
||||||
),
|
|
||||||
correspondent_id=NUMERIC(),
|
correspondent_id=NUMERIC(),
|
||||||
has_correspondent=BOOLEAN(),
|
has_correspondent=BOOLEAN(),
|
||||||
|
tag=KEYWORD(commas=True, scorable=True, lowercase=True),
|
||||||
tag=KEYWORD(
|
tag_id=KEYWORD(commas=True, scorable=True),
|
||||||
commas=True,
|
|
||||||
scorable=True,
|
|
||||||
lowercase=True
|
|
||||||
),
|
|
||||||
tag_id=KEYWORD(
|
|
||||||
commas=True,
|
|
||||||
scorable=True
|
|
||||||
),
|
|
||||||
has_tag=BOOLEAN(),
|
has_tag=BOOLEAN(),
|
||||||
|
type=TEXT(sortable=True),
|
||||||
type=TEXT(
|
|
||||||
sortable=True
|
|
||||||
),
|
|
||||||
type_id=NUMERIC(),
|
type_id=NUMERIC(),
|
||||||
has_type=BOOLEAN(),
|
has_type=BOOLEAN(),
|
||||||
|
created=DATETIME(sortable=True),
|
||||||
created=DATETIME(
|
modified=DATETIME(sortable=True),
|
||||||
sortable=True
|
added=DATETIME(sortable=True),
|
||||||
),
|
|
||||||
modified=DATETIME(
|
|
||||||
sortable=True
|
|
||||||
),
|
|
||||||
added=DATETIME(
|
|
||||||
sortable=True
|
|
||||||
),
|
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -132,7 +103,7 @@ def remove_document(writer, doc):
|
|||||||
|
|
||||||
|
|
||||||
def remove_document_by_id(writer, doc_id):
|
def remove_document_by_id(writer, doc_id):
|
||||||
writer.delete_by_term('id', doc_id)
|
writer.delete_by_term("id", doc_id)
|
||||||
|
|
||||||
|
|
||||||
def add_or_update_document(document):
|
def add_or_update_document(document):
|
||||||
@ -146,48 +117,47 @@ def remove_document_from_index(document):
|
|||||||
|
|
||||||
|
|
||||||
class DelayedQuery:
|
class DelayedQuery:
|
||||||
|
|
||||||
def _get_query(self):
|
def _get_query(self):
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def _get_query_filter(self):
|
def _get_query_filter(self):
|
||||||
criterias = []
|
criterias = []
|
||||||
for k, v in self.query_params.items():
|
for k, v in self.query_params.items():
|
||||||
if k == 'correspondent__id':
|
if k == "correspondent__id":
|
||||||
criterias.append(query.Term('correspondent_id', v))
|
criterias.append(query.Term("correspondent_id", v))
|
||||||
elif k == 'tags__id__all':
|
elif k == "tags__id__all":
|
||||||
for tag_id in v.split(","):
|
for tag_id in v.split(","):
|
||||||
criterias.append(query.Term('tag_id', tag_id))
|
criterias.append(query.Term("tag_id", tag_id))
|
||||||
elif k == 'document_type__id':
|
elif k == "document_type__id":
|
||||||
criterias.append(query.Term('type_id', v))
|
criterias.append(query.Term("type_id", v))
|
||||||
elif k == 'correspondent__isnull':
|
elif k == "correspondent__isnull":
|
||||||
criterias.append(query.Term("has_correspondent", v == "false"))
|
criterias.append(query.Term("has_correspondent", v == "false"))
|
||||||
elif k == 'is_tagged':
|
elif k == "is_tagged":
|
||||||
criterias.append(query.Term("has_tag", v == "true"))
|
criterias.append(query.Term("has_tag", v == "true"))
|
||||||
elif k == 'document_type__isnull':
|
elif k == "document_type__isnull":
|
||||||
criterias.append(query.Term("has_type", v == "false"))
|
criterias.append(query.Term("has_type", v == "false"))
|
||||||
elif k == 'created__date__lt':
|
elif k == "created__date__lt":
|
||||||
criterias.append(
|
criterias.append(
|
||||||
query.DateRange("created", start=None, end=isoparse(v)))
|
query.DateRange("created", start=None, end=isoparse(v))
|
||||||
elif k == 'created__date__gt':
|
)
|
||||||
|
elif k == "created__date__gt":
|
||||||
criterias.append(
|
criterias.append(
|
||||||
query.DateRange("created", start=isoparse(v), end=None))
|
query.DateRange("created", start=isoparse(v), end=None)
|
||||||
elif k == 'added__date__gt':
|
)
|
||||||
criterias.append(
|
elif k == "added__date__gt":
|
||||||
query.DateRange("added", start=isoparse(v), end=None))
|
criterias.append(query.DateRange("added", start=isoparse(v), end=None))
|
||||||
elif k == 'added__date__lt':
|
elif k == "added__date__lt":
|
||||||
criterias.append(
|
criterias.append(query.DateRange("added", start=None, end=isoparse(v)))
|
||||||
query.DateRange("added", start=None, end=isoparse(v)))
|
|
||||||
if len(criterias) > 0:
|
if len(criterias) > 0:
|
||||||
return query.And(criterias)
|
return query.And(criterias)
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def _get_query_sortedby(self):
|
def _get_query_sortedby(self):
|
||||||
if 'ordering' not in self.query_params:
|
if "ordering" not in self.query_params:
|
||||||
return None, False
|
return None, False
|
||||||
|
|
||||||
field: str = self.query_params['ordering']
|
field: str = self.query_params["ordering"]
|
||||||
|
|
||||||
sort_fields_map = {
|
sort_fields_map = {
|
||||||
"created": "created",
|
"created": "created",
|
||||||
@ -196,10 +166,10 @@ class DelayedQuery:
|
|||||||
"title": "title",
|
"title": "title",
|
||||||
"correspondent__name": "correspondent",
|
"correspondent__name": "correspondent",
|
||||||
"document_type__name": "type",
|
"document_type__name": "type",
|
||||||
"archive_serial_number": "asn"
|
"archive_serial_number": "asn",
|
||||||
}
|
}
|
||||||
|
|
||||||
if field.startswith('-'):
|
if field.startswith("-"):
|
||||||
field = field[1:]
|
field = field[1:]
|
||||||
reverse = True
|
reverse = True
|
||||||
else:
|
else:
|
||||||
@ -235,24 +205,23 @@ class DelayedQuery:
|
|||||||
pagenum=math.floor(item.start / self.page_size) + 1,
|
pagenum=math.floor(item.start / self.page_size) + 1,
|
||||||
pagelen=self.page_size,
|
pagelen=self.page_size,
|
||||||
sortedby=sortedby,
|
sortedby=sortedby,
|
||||||
reverse=reverse
|
reverse=reverse,
|
||||||
)
|
)
|
||||||
page.results.fragmenter = highlight.ContextFragmenter(
|
page.results.fragmenter = highlight.ContextFragmenter(surround=50)
|
||||||
surround=50)
|
|
||||||
page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
page.results.formatter = HtmlFormatter(tagname="span", between=" ... ")
|
||||||
|
|
||||||
if (not self.first_score and
|
if not self.first_score and len(page.results) > 0 and sortedby is None:
|
||||||
len(page.results) > 0 and
|
|
||||||
sortedby is None):
|
|
||||||
self.first_score = page.results[0].score
|
self.first_score = page.results[0].score
|
||||||
|
|
||||||
page.results.top_n = list(map(
|
page.results.top_n = list(
|
||||||
|
map(
|
||||||
lambda hit: (
|
lambda hit: (
|
||||||
(hit[0] / self.first_score) if self.first_score else None,
|
(hit[0] / self.first_score) if self.first_score else None,
|
||||||
hit[1]
|
hit[1],
|
||||||
),
|
),
|
||||||
page.results.top_n
|
page.results.top_n,
|
||||||
))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.saved_results[item.start] = page
|
self.saved_results[item.start] = page
|
||||||
|
|
||||||
@ -260,12 +229,12 @@ class DelayedQuery:
|
|||||||
|
|
||||||
|
|
||||||
class DelayedFullTextQuery(DelayedQuery):
|
class DelayedFullTextQuery(DelayedQuery):
|
||||||
|
|
||||||
def _get_query(self):
|
def _get_query(self):
|
||||||
q_str = self.query_params['query']
|
q_str = self.query_params["query"]
|
||||||
qp = MultifieldParser(
|
qp = MultifieldParser(
|
||||||
["content", "title", "correspondent", "tag", "type"],
|
["content", "title", "correspondent", "tag", "type"],
|
||||||
self.searcher.ixreader.schema)
|
self.searcher.ixreader.schema,
|
||||||
|
)
|
||||||
qp.add_plugin(DateParserPlugin())
|
qp.add_plugin(DateParserPlugin())
|
||||||
q = qp.parse(q_str)
|
q = qp.parse(q_str)
|
||||||
|
|
||||||
@ -277,18 +246,17 @@ class DelayedFullTextQuery(DelayedQuery):
|
|||||||
|
|
||||||
|
|
||||||
class DelayedMoreLikeThisQuery(DelayedQuery):
|
class DelayedMoreLikeThisQuery(DelayedQuery):
|
||||||
|
|
||||||
def _get_query(self):
|
def _get_query(self):
|
||||||
more_like_doc_id = int(self.query_params['more_like_id'])
|
more_like_doc_id = int(self.query_params["more_like_id"])
|
||||||
content = Document.objects.get(id=more_like_doc_id).content
|
content = Document.objects.get(id=more_like_doc_id).content
|
||||||
|
|
||||||
docnum = self.searcher.document_number(id=more_like_doc_id)
|
docnum = self.searcher.document_number(id=more_like_doc_id)
|
||||||
kts = self.searcher.key_terms_from_text(
|
kts = self.searcher.key_terms_from_text(
|
||||||
'content', content, numterms=20,
|
"content", content, numterms=20, model=classify.Bo1Model, normalize=False
|
||||||
model=classify.Bo1Model, normalize=False)
|
)
|
||||||
q = query.Or(
|
q = query.Or(
|
||||||
[query.Term('content', word, boost=weight)
|
[query.Term("content", word, boost=weight) for word, weight in kts]
|
||||||
for word, weight in kts])
|
)
|
||||||
mask = {docnum}
|
mask = {docnum}
|
||||||
|
|
||||||
return q, mask
|
return q, mask
|
||||||
@ -298,6 +266,7 @@ def autocomplete(ix, term, limit=10):
|
|||||||
with ix.reader() as reader:
|
with ix.reader() as reader:
|
||||||
terms = []
|
terms = []
|
||||||
for (score, t) in reader.most_distinctive_terms(
|
for (score, t) in reader.most_distinctive_terms(
|
||||||
"content", number=limit, prefix=term.lower()):
|
"content", number=limit, prefix=term.lower()
|
||||||
|
):
|
||||||
terms.append(t)
|
terms.append(t)
|
||||||
return terms
|
return terms
|
||||||
|
@ -17,12 +17,7 @@ class LoggingMixin:
|
|||||||
if self.logging_name:
|
if self.logging_name:
|
||||||
logger = logging.getLogger(self.logging_name)
|
logger = logging.getLogger(self.logging_name)
|
||||||
else:
|
else:
|
||||||
name = ".".join([
|
name = ".".join([self.__class__.__module__, self.__class__.__name__])
|
||||||
self.__class__.__module__,
|
|
||||||
self.__class__.__name__
|
|
||||||
])
|
|
||||||
logger = logging.getLogger(name)
|
logger = logging.getLogger(name)
|
||||||
|
|
||||||
getattr(logger, level)(message, extra={
|
getattr(logger, level)(message, extra={"group": self.logging_group}, **kwargs)
|
||||||
"group": self.logging_group
|
|
||||||
}, **kwargs)
|
|
||||||
|
@ -19,7 +19,7 @@ class Command(BaseCommand):
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--passphrase",
|
"--passphrase",
|
||||||
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
help="If PAPERLESS_PASSPHRASE isn't set already, you need to "
|
||||||
"specify it here"
|
"specify it here",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
@ -50,12 +50,12 @@ class Command(BaseCommand):
|
|||||||
def __gpg_to_unencrypted(passphrase):
|
def __gpg_to_unencrypted(passphrase):
|
||||||
|
|
||||||
encrypted_files = Document.objects.filter(
|
encrypted_files = Document.objects.filter(
|
||||||
storage_type=Document.STORAGE_TYPE_GPG)
|
storage_type=Document.STORAGE_TYPE_GPG
|
||||||
|
)
|
||||||
|
|
||||||
for document in encrypted_files:
|
for document in encrypted_files:
|
||||||
|
|
||||||
print("Decrypting {}".format(
|
print("Decrypting {}".format(document).encode("utf-8"))
|
||||||
document).encode('utf-8'))
|
|
||||||
|
|
||||||
old_paths = [document.source_path, document.thumbnail_path]
|
old_paths = [document.source_path, document.thumbnail_path]
|
||||||
|
|
||||||
@ -66,10 +66,11 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
ext = os.path.splitext(document.filename)[1]
|
ext = os.path.splitext(document.filename)[1]
|
||||||
|
|
||||||
if not ext == '.gpg':
|
if not ext == ".gpg":
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
f"Abort: encrypted file {document.source_path} does not "
|
f"Abort: encrypted file {document.source_path} does not "
|
||||||
f"end with .gpg")
|
f"end with .gpg"
|
||||||
|
)
|
||||||
|
|
||||||
document.filename = os.path.splitext(document.filename)[0]
|
document.filename = os.path.splitext(document.filename)[0]
|
||||||
|
|
||||||
@ -80,7 +81,8 @@ class Command(BaseCommand):
|
|||||||
f.write(raw_thumb)
|
f.write(raw_thumb)
|
||||||
|
|
||||||
Document.objects.filter(id=document.id).update(
|
Document.objects.filter(id=document.id).update(
|
||||||
storage_type=document.storage_type, filename=document.filename)
|
storage_type=document.storage_type, filename=document.filename
|
||||||
|
)
|
||||||
|
|
||||||
for path in old_paths:
|
for path in old_paths:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
|
@ -16,8 +16,7 @@ from whoosh.writing import AsyncWriter
|
|||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from ... import index
|
from ... import index
|
||||||
from ...file_handling import create_source_path_directory, \
|
from ...file_handling import create_source_path_directory, generate_unique_filename
|
||||||
generate_unique_filename
|
|
||||||
from ...parsers import get_parser_class_for_mime_type
|
from ...parsers import get_parser_class_for_mime_type
|
||||||
|
|
||||||
|
|
||||||
@ -32,51 +31,49 @@ def handle_document(document_id):
|
|||||||
parser_class = get_parser_class_for_mime_type(mime_type)
|
parser_class = get_parser_class_for_mime_type(mime_type)
|
||||||
|
|
||||||
if not parser_class:
|
if not parser_class:
|
||||||
logger.error(f"No parser found for mime type {mime_type}, cannot "
|
logger.error(
|
||||||
f"archive document {document} (ID: {document_id})")
|
f"No parser found for mime type {mime_type}, cannot "
|
||||||
|
f"archive document {document} (ID: {document_id})"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
parser = parser_class(logging_group=uuid.uuid4())
|
parser = parser_class(logging_group=uuid.uuid4())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
parser.parse(
|
parser.parse(document.source_path, mime_type, document.get_public_filename())
|
||||||
document.source_path,
|
|
||||||
mime_type,
|
|
||||||
document.get_public_filename())
|
|
||||||
|
|
||||||
thumbnail = parser.get_optimised_thumbnail(
|
thumbnail = parser.get_optimised_thumbnail(
|
||||||
document.source_path,
|
document.source_path, mime_type, document.get_public_filename()
|
||||||
mime_type,
|
|
||||||
document.get_public_filename()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if parser.get_archive_path():
|
if parser.get_archive_path():
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
with open(parser.get_archive_path(), 'rb') as f:
|
with open(parser.get_archive_path(), "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
# I'm going to save first so that in case the file move
|
# I'm going to save first so that in case the file move
|
||||||
# fails, the database is rolled back.
|
# fails, the database is rolled back.
|
||||||
# We also don't use save() since that triggers the filehandling
|
# We also don't use save() since that triggers the filehandling
|
||||||
# logic, and we don't want that yet (file not yet in place)
|
# logic, and we don't want that yet (file not yet in place)
|
||||||
document.archive_filename = generate_unique_filename(
|
document.archive_filename = generate_unique_filename(
|
||||||
document, archive_filename=True)
|
document, archive_filename=True
|
||||||
|
)
|
||||||
Document.objects.filter(pk=document.pk).update(
|
Document.objects.filter(pk=document.pk).update(
|
||||||
archive_checksum=checksum,
|
archive_checksum=checksum,
|
||||||
content=parser.get_text(),
|
content=parser.get_text(),
|
||||||
archive_filename=document.archive_filename
|
archive_filename=document.archive_filename,
|
||||||
)
|
)
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
create_source_path_directory(document.archive_path)
|
create_source_path_directory(document.archive_path)
|
||||||
shutil.move(parser.get_archive_path(),
|
shutil.move(parser.get_archive_path(), document.archive_path)
|
||||||
document.archive_path)
|
|
||||||
shutil.move(thumbnail, document.thumbnail_path)
|
shutil.move(thumbnail, document.thumbnail_path)
|
||||||
|
|
||||||
with index.open_index_writer() as writer:
|
with index.open_index_writer() as writer:
|
||||||
index.update_document(writer, document)
|
index.update_document(writer, document)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.exception(f"Error while parsing document {document} "
|
logger.exception(
|
||||||
f"(ID: {document_id})")
|
f"Error while parsing document {document} " f"(ID: {document_id})"
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
parser.cleanup()
|
parser.cleanup()
|
||||||
|
|
||||||
@ -88,29 +85,33 @@ class Command(BaseCommand):
|
|||||||
and document types to all documents, effectively allowing you to
|
and document types to all documents, effectively allowing you to
|
||||||
back-tag all previously indexed documents with metadata created (or
|
back-tag all previously indexed documents with metadata created (or
|
||||||
modified) after their initial import.
|
modified) after their initial import.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f", "--overwrite",
|
"-f",
|
||||||
|
"--overwrite",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Recreates the archived document for documents that already "
|
help="Recreates the archived document for documents that already "
|
||||||
"have an archived version."
|
"have an archived version.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d", "--document",
|
"-d",
|
||||||
|
"--document",
|
||||||
default=None,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
required=False,
|
required=False,
|
||||||
help="Specify the ID of a document, and this command will only "
|
help="Specify the ID of a document, and this command will only "
|
||||||
"run on this specific document."
|
"run on this specific document.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
@ -119,18 +120,17 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
overwrite = options["overwrite"]
|
overwrite = options["overwrite"]
|
||||||
|
|
||||||
if options['document']:
|
if options["document"]:
|
||||||
documents = Document.objects.filter(pk=options['document'])
|
documents = Document.objects.filter(pk=options["document"])
|
||||||
else:
|
else:
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
document_ids = list(map(
|
document_ids = list(
|
||||||
|
map(
|
||||||
lambda doc: doc.id,
|
lambda doc: doc.id,
|
||||||
filter(
|
filter(lambda d: overwrite or not d.has_archive_version, documents),
|
||||||
lambda d: overwrite or not d.has_archive_version,
|
)
|
||||||
documents
|
|
||||||
)
|
)
|
||||||
))
|
|
||||||
|
|
||||||
# Note to future self: this prevents django from reusing database
|
# Note to future self: this prevents django from reusing database
|
||||||
# conncetions between processes, which is bad and does not work
|
# conncetions between processes, which is bad and does not work
|
||||||
@ -141,13 +141,12 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
|
with multiprocessing.Pool(processes=settings.TASK_WORKERS) as pool:
|
||||||
list(tqdm.tqdm(
|
list(
|
||||||
pool.imap_unordered(
|
tqdm.tqdm(
|
||||||
handle_document,
|
pool.imap_unordered(handle_document, document_ids),
|
||||||
document_ids
|
|
||||||
),
|
|
||||||
total=len(document_ids),
|
total=len(document_ids),
|
||||||
disable=options['no_progress_bar']
|
disable=options["no_progress_bar"],
|
||||||
))
|
)
|
||||||
|
)
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print("Aborting...")
|
print("Aborting...")
|
||||||
|
@ -26,21 +26,18 @@ def _tags_from_path(filepath):
|
|||||||
and get or create Tag IDs for every directory.
|
and get or create Tag IDs for every directory.
|
||||||
"""
|
"""
|
||||||
tag_ids = set()
|
tag_ids = set()
|
||||||
path_parts = Path(filepath).relative_to(
|
path_parts = Path(filepath).relative_to(settings.CONSUMPTION_DIR).parent.parts
|
||||||
settings.CONSUMPTION_DIR).parent.parts
|
|
||||||
for part in path_parts:
|
for part in path_parts:
|
||||||
tag_ids.add(Tag.objects.get_or_create(name__iexact=part, defaults={
|
tag_ids.add(
|
||||||
"name": part
|
Tag.objects.get_or_create(name__iexact=part, defaults={"name": part})[0].pk
|
||||||
})[0].pk)
|
)
|
||||||
|
|
||||||
return tag_ids
|
return tag_ids
|
||||||
|
|
||||||
|
|
||||||
def _is_ignored(filepath: str) -> bool:
|
def _is_ignored(filepath: str) -> bool:
|
||||||
filepath_relative = PurePath(filepath).relative_to(
|
filepath_relative = PurePath(filepath).relative_to(settings.CONSUMPTION_DIR)
|
||||||
settings.CONSUMPTION_DIR)
|
return any(filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
|
||||||
return any(
|
|
||||||
filepath_relative.match(p) for p in settings.CONSUMER_IGNORE_PATTERNS)
|
|
||||||
|
|
||||||
|
|
||||||
def _consume(filepath):
|
def _consume(filepath):
|
||||||
@ -48,13 +45,11 @@ def _consume(filepath):
|
|||||||
return
|
return
|
||||||
|
|
||||||
if not os.path.isfile(filepath):
|
if not os.path.isfile(filepath):
|
||||||
logger.debug(
|
logger.debug(f"Not consuming file {filepath}: File has moved.")
|
||||||
f"Not consuming file {filepath}: File has moved.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
|
if not is_file_ext_supported(os.path.splitext(filepath)[1]):
|
||||||
logger.warning(
|
logger.warning(f"Not consuming file {filepath}: Unknown file extension.")
|
||||||
f"Not consuming file {filepath}: Unknown file extension.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
tag_ids = None
|
tag_ids = None
|
||||||
@ -66,10 +61,12 @@ def _consume(filepath):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
logger.info(f"Adding {filepath} to the task queue.")
|
logger.info(f"Adding {filepath} to the task queue.")
|
||||||
async_task("documents.tasks.consume_file",
|
async_task(
|
||||||
|
"documents.tasks.consume_file",
|
||||||
filepath,
|
filepath,
|
||||||
override_tag_ids=tag_ids if tag_ids else None,
|
override_tag_ids=tag_ids if tag_ids else None,
|
||||||
task_name=os.path.basename(filepath)[:100])
|
task_name=os.path.basename(filepath)[:100],
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Catch all so that the consumer won't crash.
|
# Catch all so that the consumer won't crash.
|
||||||
# This is also what the test case is listening for to check for
|
# This is also what the test case is listening for to check for
|
||||||
@ -88,8 +85,9 @@ def _consume_wait_unmodified(file):
|
|||||||
try:
|
try:
|
||||||
new_mtime = os.stat(file).st_mtime
|
new_mtime = os.stat(file).st_mtime
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
logger.debug(f"File {file} moved while waiting for it to remain "
|
logger.debug(
|
||||||
f"unmodified.")
|
f"File {file} moved while waiting for it to remain " f"unmodified."
|
||||||
|
)
|
||||||
return
|
return
|
||||||
if new_mtime == mtime:
|
if new_mtime == mtime:
|
||||||
_consume(file)
|
_consume(file)
|
||||||
@ -102,16 +100,11 @@ def _consume_wait_unmodified(file):
|
|||||||
|
|
||||||
|
|
||||||
class Handler(FileSystemEventHandler):
|
class Handler(FileSystemEventHandler):
|
||||||
|
|
||||||
def on_created(self, event):
|
def on_created(self, event):
|
||||||
Thread(
|
Thread(target=_consume_wait_unmodified, args=(event.src_path,)).start()
|
||||||
target=_consume_wait_unmodified, args=(event.src_path,)
|
|
||||||
).start()
|
|
||||||
|
|
||||||
def on_moved(self, event):
|
def on_moved(self, event):
|
||||||
Thread(
|
Thread(target=_consume_wait_unmodified, args=(event.dest_path,)).start()
|
||||||
target=_consume_wait_unmodified, args=(event.dest_path,)
|
|
||||||
).start()
|
|
||||||
|
|
||||||
|
|
||||||
class Command(BaseCommand):
|
class Command(BaseCommand):
|
||||||
@ -130,26 +123,19 @@ class Command(BaseCommand):
|
|||||||
"directory",
|
"directory",
|
||||||
default=settings.CONSUMPTION_DIR,
|
default=settings.CONSUMPTION_DIR,
|
||||||
nargs="?",
|
nargs="?",
|
||||||
help="The consumption directory."
|
help="The consumption directory.",
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--oneshot",
|
|
||||||
action="store_true",
|
|
||||||
help="Run only once."
|
|
||||||
)
|
)
|
||||||
|
parser.add_argument("--oneshot", action="store_true", help="Run only once.")
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
directory = options["directory"]
|
directory = options["directory"]
|
||||||
recursive = settings.CONSUMER_RECURSIVE
|
recursive = settings.CONSUMER_RECURSIVE
|
||||||
|
|
||||||
if not directory:
|
if not directory:
|
||||||
raise CommandError(
|
raise CommandError("CONSUMPTION_DIR does not appear to be set.")
|
||||||
"CONSUMPTION_DIR does not appear to be set."
|
|
||||||
)
|
|
||||||
|
|
||||||
if not os.path.isdir(directory):
|
if not os.path.isdir(directory):
|
||||||
raise CommandError(
|
raise CommandError(f"Consumption directory {directory} does not exist")
|
||||||
f"Consumption directory {directory} does not exist")
|
|
||||||
|
|
||||||
if recursive:
|
if recursive:
|
||||||
for dirpath, _, filenames in os.walk(directory):
|
for dirpath, _, filenames in os.walk(directory):
|
||||||
@ -171,8 +157,7 @@ class Command(BaseCommand):
|
|||||||
logger.debug("Consumer exiting.")
|
logger.debug("Consumer exiting.")
|
||||||
|
|
||||||
def handle_polling(self, directory, recursive):
|
def handle_polling(self, directory, recursive):
|
||||||
logger.info(
|
logger.info(f"Polling directory for changes: {directory}")
|
||||||
f"Polling directory for changes: {directory}")
|
|
||||||
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
self.observer = PollingObserver(timeout=settings.CONSUMER_POLLING)
|
||||||
self.observer.schedule(Handler(), directory, recursive=recursive)
|
self.observer.schedule(Handler(), directory, recursive=recursive)
|
||||||
self.observer.start()
|
self.observer.start()
|
||||||
@ -186,8 +171,7 @@ class Command(BaseCommand):
|
|||||||
self.observer.join()
|
self.observer.join()
|
||||||
|
|
||||||
def handle_inotify(self, directory, recursive):
|
def handle_inotify(self, directory, recursive):
|
||||||
logger.info(
|
logger.info(f"Using inotify to watch directory for changes: {directory}")
|
||||||
f"Using inotify to watch directory for changes: {directory}")
|
|
||||||
|
|
||||||
inotify = INotify()
|
inotify = INotify()
|
||||||
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
|
inotify_flags = flags.CLOSE_WRITE | flags.MOVED_TO
|
||||||
|
@ -8,7 +8,9 @@ class Command(BaseCommand):
|
|||||||
help = """
|
help = """
|
||||||
Trains the classifier on your data and saves the resulting models to a
|
Trains the classifier on your data and saves the resulting models to a
|
||||||
file. The document consumer will then automatically use this new model.
|
file. The document consumer will then automatically use this new model.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
BaseCommand.__init__(self, *args, **kwargs)
|
BaseCommand.__init__(self, *args, **kwargs)
|
||||||
|
@ -12,10 +12,19 @@ from django.core.management.base import BaseCommand, CommandError
|
|||||||
from django.db import transaction
|
from django.db import transaction
|
||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
|
|
||||||
from documents.models import Document, Correspondent, Tag, DocumentType, \
|
from documents.models import (
|
||||||
SavedView, SavedViewFilterRule
|
Document,
|
||||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
Correspondent,
|
||||||
EXPORTER_ARCHIVE_NAME
|
Tag,
|
||||||
|
DocumentType,
|
||||||
|
SavedView,
|
||||||
|
SavedViewFilterRule,
|
||||||
|
)
|
||||||
|
from documents.settings import (
|
||||||
|
EXPORTER_FILE_NAME,
|
||||||
|
EXPORTER_THUMBNAIL_NAME,
|
||||||
|
EXPORTER_ARCHIVE_NAME,
|
||||||
|
)
|
||||||
from paperless.db import GnuPG
|
from paperless.db import GnuPG
|
||||||
from paperless_mail.models import MailAccount, MailRule
|
from paperless_mail.models import MailAccount, MailRule
|
||||||
from ...file_handling import generate_filename, delete_empty_directories
|
from ...file_handling import generate_filename, delete_empty_directories
|
||||||
@ -27,41 +36,46 @@ class Command(BaseCommand):
|
|||||||
Decrypt and rename all files in our collection into a given target
|
Decrypt and rename all files in our collection into a given target
|
||||||
directory. And include a manifest file containing document data for
|
directory. And include a manifest file containing document data for
|
||||||
easy import.
|
easy import.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("target")
|
parser.add_argument("target")
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-c", "--compare-checksums",
|
"-c",
|
||||||
|
"--compare-checksums",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Compare file checksums when determining whether to export "
|
help="Compare file checksums when determining whether to export "
|
||||||
"a file or not. If not specified, file size and time "
|
"a file or not. If not specified, file size and time "
|
||||||
"modified is used instead."
|
"modified is used instead.",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f", "--use-filename-format",
|
"-f",
|
||||||
|
"--use-filename-format",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
help="Use PAPERLESS_FILENAME_FORMAT for storing files in the "
|
||||||
"export directory, if configured."
|
"export directory, if configured.",
|
||||||
)
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d", "--delete",
|
"-d",
|
||||||
|
"--delete",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="After exporting, delete files in the export directory that "
|
help="After exporting, delete files in the export directory that "
|
||||||
"do not belong to the current export, such as files from "
|
"do not belong to the current export, such as files from "
|
||||||
"deleted documents."
|
"deleted documents.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -76,9 +90,9 @@ class Command(BaseCommand):
|
|||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
self.target = options["target"]
|
self.target = options["target"]
|
||||||
self.compare_checksums = options['compare_checksums']
|
self.compare_checksums = options["compare_checksums"]
|
||||||
self.use_filename_format = options['use_filename_format']
|
self.use_filename_format = options["use_filename_format"]
|
||||||
self.delete = options['delete']
|
self.delete = options["delete"]
|
||||||
|
|
||||||
if not os.path.exists(self.target):
|
if not os.path.exists(self.target):
|
||||||
raise CommandError("That path doesn't exist")
|
raise CommandError("That path doesn't exist")
|
||||||
@ -87,7 +101,7 @@ class Command(BaseCommand):
|
|||||||
raise CommandError("That path doesn't appear to be writable")
|
raise CommandError("That path doesn't appear to be writable")
|
||||||
|
|
||||||
with FileLock(settings.MEDIA_LOCK):
|
with FileLock(settings.MEDIA_LOCK):
|
||||||
self.dump(options['no_progress_bar'])
|
self.dump(options["no_progress_bar"])
|
||||||
|
|
||||||
def dump(self, progress_bar_disable=False):
|
def dump(self, progress_bar_disable=False):
|
||||||
# 1. Take a snapshot of what files exist in the current export folder
|
# 1. Take a snapshot of what files exist in the current export folder
|
||||||
@ -100,43 +114,48 @@ class Command(BaseCommand):
|
|||||||
# documents
|
# documents
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
manifest = json.loads(
|
manifest = json.loads(
|
||||||
serializers.serialize("json", Correspondent.objects.all()))
|
serializers.serialize("json", Correspondent.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(serializers.serialize("json", Tag.objects.all()))
|
||||||
"json", Tag.objects.all()))
|
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(
|
||||||
"json", DocumentType.objects.all()))
|
serializers.serialize("json", DocumentType.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
documents = Document.objects.order_by("id")
|
documents = Document.objects.order_by("id")
|
||||||
document_map = {d.pk: d for d in documents}
|
document_map = {d.pk: d for d in documents}
|
||||||
document_manifest = json.loads(
|
document_manifest = json.loads(serializers.serialize("json", documents))
|
||||||
serializers.serialize("json", documents))
|
|
||||||
manifest += document_manifest
|
manifest += document_manifest
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(
|
||||||
"json", MailAccount.objects.all()))
|
serializers.serialize("json", MailAccount.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(
|
||||||
"json", MailRule.objects.all()))
|
serializers.serialize("json", MailRule.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(
|
||||||
"json", SavedView.objects.all()))
|
serializers.serialize("json", SavedView.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(
|
||||||
"json", SavedViewFilterRule.objects.all()))
|
serializers.serialize("json", SavedViewFilterRule.objects.all())
|
||||||
|
)
|
||||||
|
|
||||||
manifest += json.loads(serializers.serialize(
|
manifest += json.loads(serializers.serialize("json", User.objects.all()))
|
||||||
"json", User.objects.all()))
|
|
||||||
|
|
||||||
# 3. Export files from each document
|
# 3. Export files from each document
|
||||||
for index, document_dict in tqdm.tqdm(
|
for index, document_dict in tqdm.tqdm(
|
||||||
enumerate(document_manifest),
|
enumerate(document_manifest),
|
||||||
total=len(document_manifest),
|
total=len(document_manifest),
|
||||||
disable=progress_bar_disable
|
disable=progress_bar_disable,
|
||||||
):
|
):
|
||||||
# 3.1. store files unencrypted
|
# 3.1. store files unencrypted
|
||||||
document_dict["fields"]["storage_type"] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
document_dict["fields"][
|
||||||
|
"storage_type"
|
||||||
|
] = Document.STORAGE_TYPE_UNENCRYPTED # NOQA: E501
|
||||||
|
|
||||||
document = document_map[document_dict["pk"]]
|
document = document_map[document_dict["pk"]]
|
||||||
|
|
||||||
@ -145,11 +164,10 @@ class Command(BaseCommand):
|
|||||||
while True:
|
while True:
|
||||||
if self.use_filename_format:
|
if self.use_filename_format:
|
||||||
base_name = generate_filename(
|
base_name = generate_filename(
|
||||||
document, counter=filename_counter,
|
document, counter=filename_counter, append_gpg=False
|
||||||
append_gpg=False)
|
)
|
||||||
else:
|
else:
|
||||||
base_name = document.get_public_filename(
|
base_name = document.get_public_filename(counter=filename_counter)
|
||||||
counter=filename_counter)
|
|
||||||
|
|
||||||
if base_name not in self.exported_files:
|
if base_name not in self.exported_files:
|
||||||
self.exported_files.append(base_name)
|
self.exported_files.append(base_name)
|
||||||
@ -193,22 +211,19 @@ class Command(BaseCommand):
|
|||||||
f.write(GnuPG.decrypted(document.archive_path))
|
f.write(GnuPG.decrypted(document.archive_path))
|
||||||
os.utime(archive_target, times=(t, t))
|
os.utime(archive_target, times=(t, t))
|
||||||
else:
|
else:
|
||||||
self.check_and_copy(document.source_path,
|
self.check_and_copy(
|
||||||
document.checksum,
|
document.source_path, document.checksum, original_target
|
||||||
original_target)
|
)
|
||||||
|
|
||||||
self.check_and_copy(document.thumbnail_path,
|
self.check_and_copy(document.thumbnail_path, None, thumbnail_target)
|
||||||
None,
|
|
||||||
thumbnail_target)
|
|
||||||
|
|
||||||
if archive_target:
|
if archive_target:
|
||||||
self.check_and_copy(document.archive_path,
|
self.check_and_copy(
|
||||||
document.archive_checksum,
|
document.archive_path, document.archive_checksum, archive_target
|
||||||
archive_target)
|
)
|
||||||
|
|
||||||
# 4. write manifest to target forlder
|
# 4. write manifest to target forlder
|
||||||
manifest_path = os.path.abspath(
|
manifest_path = os.path.abspath(os.path.join(self.target, "manifest.json"))
|
||||||
os.path.join(self.target, "manifest.json"))
|
|
||||||
|
|
||||||
with open(manifest_path, "w") as f:
|
with open(manifest_path, "w") as f:
|
||||||
json.dump(manifest, f, indent=2)
|
json.dump(manifest, f, indent=2)
|
||||||
@ -222,8 +237,9 @@ class Command(BaseCommand):
|
|||||||
for f in self.files_in_export_dir:
|
for f in self.files_in_export_dir:
|
||||||
os.remove(f)
|
os.remove(f)
|
||||||
|
|
||||||
delete_empty_directories(os.path.abspath(os.path.dirname(f)),
|
delete_empty_directories(
|
||||||
os.path.abspath(self.target))
|
os.path.abspath(os.path.dirname(f)), os.path.abspath(self.target)
|
||||||
|
)
|
||||||
|
|
||||||
def check_and_copy(self, source, source_checksum, target):
|
def check_and_copy(self, source, source_checksum, target):
|
||||||
if os.path.abspath(target) in self.files_in_export_dir:
|
if os.path.abspath(target) in self.files_in_export_dir:
|
||||||
|
@ -12,8 +12,11 @@ from django.db.models.signals import post_save, m2m_changed
|
|||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
from documents.settings import EXPORTER_FILE_NAME, EXPORTER_THUMBNAIL_NAME, \
|
from documents.settings import (
|
||||||
EXPORTER_ARCHIVE_NAME
|
EXPORTER_FILE_NAME,
|
||||||
|
EXPORTER_THUMBNAIL_NAME,
|
||||||
|
EXPORTER_ARCHIVE_NAME,
|
||||||
|
)
|
||||||
from ...file_handling import create_source_path_directory
|
from ...file_handling import create_source_path_directory
|
||||||
from ...signals.handlers import update_filename_and_move_files
|
from ...signals.handlers import update_filename_and_move_files
|
||||||
|
|
||||||
@ -32,7 +35,9 @@ class Command(BaseCommand):
|
|||||||
help = """
|
help = """
|
||||||
Using a manifest.json file, load the data from there, and import the
|
Using a manifest.json file, load the data from there, and import the
|
||||||
documents it refers to.
|
documents it refers to.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("source")
|
parser.add_argument("source")
|
||||||
@ -40,7 +45,7 @@ class Command(BaseCommand):
|
|||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
@ -67,26 +72,27 @@ class Command(BaseCommand):
|
|||||||
self.manifest = json.load(f)
|
self.manifest = json.load(f)
|
||||||
|
|
||||||
self._check_manifest()
|
self._check_manifest()
|
||||||
with disable_signal(post_save,
|
with disable_signal(
|
||||||
|
post_save, receiver=update_filename_and_move_files, sender=Document
|
||||||
|
):
|
||||||
|
with disable_signal(
|
||||||
|
m2m_changed,
|
||||||
receiver=update_filename_and_move_files,
|
receiver=update_filename_and_move_files,
|
||||||
sender=Document):
|
sender=Document.tags.through,
|
||||||
with disable_signal(m2m_changed,
|
):
|
||||||
receiver=update_filename_and_move_files,
|
|
||||||
sender=Document.tags.through):
|
|
||||||
# Fill up the database with whatever is in the manifest
|
# Fill up the database with whatever is in the manifest
|
||||||
call_command("loaddata", manifest_path)
|
call_command("loaddata", manifest_path)
|
||||||
|
|
||||||
self._import_files_from_manifest(options['no_progress_bar'])
|
self._import_files_from_manifest(options["no_progress_bar"])
|
||||||
|
|
||||||
print("Updating search index...")
|
print("Updating search index...")
|
||||||
call_command('document_index', 'reindex')
|
call_command("document_index", "reindex")
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _check_manifest_exists(path):
|
def _check_manifest_exists(path):
|
||||||
if not os.path.exists(path):
|
if not os.path.exists(path):
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
"That directory doesn't appear to contain a manifest.json "
|
"That directory doesn't appear to contain a manifest.json " "file."
|
||||||
"file."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _check_manifest(self):
|
def _check_manifest(self):
|
||||||
@ -98,15 +104,15 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
if EXPORTER_FILE_NAME not in record:
|
if EXPORTER_FILE_NAME not in record:
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
'The manifest file contains a record which does not '
|
"The manifest file contains a record which does not "
|
||||||
'refer to an actual document file.'
|
"refer to an actual document file."
|
||||||
)
|
)
|
||||||
|
|
||||||
doc_file = record[EXPORTER_FILE_NAME]
|
doc_file = record[EXPORTER_FILE_NAME]
|
||||||
if not os.path.exists(os.path.join(self.source, doc_file)):
|
if not os.path.exists(os.path.join(self.source, doc_file)):
|
||||||
raise CommandError(
|
raise CommandError(
|
||||||
'The manifest file refers to "{}" which does not '
|
'The manifest file refers to "{}" which does not '
|
||||||
'appear to be in the source directory.'.format(doc_file)
|
"appear to be in the source directory.".format(doc_file)
|
||||||
)
|
)
|
||||||
|
|
||||||
if EXPORTER_ARCHIVE_NAME in record:
|
if EXPORTER_ARCHIVE_NAME in record:
|
||||||
@ -125,14 +131,11 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
print("Copy files into paperless...")
|
print("Copy files into paperless...")
|
||||||
|
|
||||||
manifest_documents = list(filter(
|
manifest_documents = list(
|
||||||
lambda r: r["model"] == "documents.document",
|
filter(lambda r: r["model"] == "documents.document", self.manifest)
|
||||||
self.manifest))
|
)
|
||||||
|
|
||||||
for record in tqdm.tqdm(
|
for record in tqdm.tqdm(manifest_documents, disable=progress_bar_disable):
|
||||||
manifest_documents,
|
|
||||||
disable=progress_bar_disable
|
|
||||||
):
|
|
||||||
|
|
||||||
document = Document.objects.get(pk=record["pk"])
|
document = Document.objects.get(pk=record["pk"])
|
||||||
|
|
||||||
|
@ -9,17 +9,17 @@ class Command(BaseCommand):
|
|||||||
help = "Manages the document index."
|
help = "Manages the document index."
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument("command", choices=['reindex', 'optimize'])
|
parser.add_argument("command", choices=["reindex", "optimize"])
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
with transaction.atomic():
|
with transaction.atomic():
|
||||||
if options['command'] == 'reindex':
|
if options["command"] == "reindex":
|
||||||
index_reindex(progress_bar_disable=options['no_progress_bar'])
|
index_reindex(progress_bar_disable=options["no_progress_bar"])
|
||||||
elif options['command'] == 'optimize':
|
elif options["command"] == "optimize":
|
||||||
index_optimize()
|
index_optimize()
|
||||||
|
@ -11,14 +11,16 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
help = """
|
help = """
|
||||||
This will rename all documents to match the latest filename format.
|
This will rename all documents to match the latest filename format.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
@ -26,7 +28,6 @@ class Command(BaseCommand):
|
|||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
for document in tqdm.tqdm(
|
for document in tqdm.tqdm(
|
||||||
Document.objects.all(),
|
Document.objects.all(), disable=options["no_progress_bar"]
|
||||||
disable=options['no_progress_bar']
|
|
||||||
):
|
):
|
||||||
post_save.send(Document, instance=document)
|
post_save.send(Document, instance=document)
|
||||||
|
@ -18,60 +18,46 @@ class Command(BaseCommand):
|
|||||||
and document types to all documents, effectively allowing you to
|
and document types to all documents, effectively allowing you to
|
||||||
back-tag all previously indexed documents with metadata created (or
|
back-tag all previously indexed documents with metadata created (or
|
||||||
modified) after their initial import.
|
modified) after their initial import.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument("-c", "--correspondent", default=False, action="store_true")
|
||||||
"-c", "--correspondent",
|
parser.add_argument("-T", "--tags", default=False, action="store_true")
|
||||||
default=False,
|
parser.add_argument("-t", "--document_type", default=False, action="store_true")
|
||||||
action="store_true"
|
parser.add_argument("-i", "--inbox-only", default=False, action="store_true")
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-T", "--tags",
|
|
||||||
default=False,
|
|
||||||
action="store_true"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-t", "--document_type",
|
|
||||||
default=False,
|
|
||||||
action="store_true"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-i", "--inbox-only",
|
|
||||||
default=False,
|
|
||||||
action="store_true"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--use-first",
|
"--use-first",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="By default this command won't try to assign a correspondent "
|
help="By default this command won't try to assign a correspondent "
|
||||||
"if more than one matches the document. Use this flag if "
|
"if more than one matches the document. Use this flag if "
|
||||||
"you'd rather it just pick the first one it finds."
|
"you'd rather it just pick the first one it finds.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-f", "--overwrite",
|
"-f",
|
||||||
|
"--overwrite",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the document retagger will overwrite any previously"
|
help="If set, the document retagger will overwrite any previously"
|
||||||
"set correspondent, document and remove correspondents, types"
|
"set correspondent, document and remove correspondents, types"
|
||||||
"and tags that do not match anymore due to changed rules."
|
"and tags that do not match anymore due to changed rules.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--suggest",
|
"--suggest",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Return the suggestion, don't change anything."
|
help="Return the suggestion, don't change anything.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--base-url",
|
"--base-url", help="The base URL to use to build the link to the documents."
|
||||||
help="The base URL to use to build the link to the documents."
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
@ -86,38 +72,39 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
|
|
||||||
for document in tqdm.tqdm(
|
for document in tqdm.tqdm(documents, disable=options["no_progress_bar"]):
|
||||||
documents,
|
|
||||||
disable=options['no_progress_bar']
|
|
||||||
):
|
|
||||||
|
|
||||||
if options['correspondent']:
|
if options["correspondent"]:
|
||||||
set_correspondent(
|
set_correspondent(
|
||||||
sender=None,
|
sender=None,
|
||||||
document=document,
|
document=document,
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
replace=options['overwrite'],
|
replace=options["overwrite"],
|
||||||
use_first=options['use_first'],
|
use_first=options["use_first"],
|
||||||
suggest=options['suggest'],
|
suggest=options["suggest"],
|
||||||
base_url=options['base_url'],
|
base_url=options["base_url"],
|
||||||
color=color)
|
color=color,
|
||||||
|
)
|
||||||
|
|
||||||
if options['document_type']:
|
if options["document_type"]:
|
||||||
set_document_type(sender=None,
|
set_document_type(
|
||||||
|
sender=None,
|
||||||
document=document,
|
document=document,
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
replace=options['overwrite'],
|
replace=options["overwrite"],
|
||||||
use_first=options['use_first'],
|
use_first=options["use_first"],
|
||||||
suggest=options['suggest'],
|
suggest=options["suggest"],
|
||||||
base_url=options['base_url'],
|
base_url=options["base_url"],
|
||||||
color=color)
|
color=color,
|
||||||
|
)
|
||||||
|
|
||||||
if options['tags']:
|
if options["tags"]:
|
||||||
set_tags(
|
set_tags(
|
||||||
sender=None,
|
sender=None,
|
||||||
document=document,
|
document=document,
|
||||||
classifier=classifier,
|
classifier=classifier,
|
||||||
replace=options['overwrite'],
|
replace=options["overwrite"],
|
||||||
suggest=options['suggest'],
|
suggest=options["suggest"],
|
||||||
base_url=options['base_url'],
|
base_url=options["base_url"],
|
||||||
color=color)
|
color=color,
|
||||||
|
)
|
||||||
|
@ -6,18 +6,20 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
help = """
|
help = """
|
||||||
This command checks your document archive for issues.
|
This command checks your document archive for issues.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
messages = check_sanity(progress=not options['no_progress_bar'])
|
messages = check_sanity(progress=not options["no_progress_bar"])
|
||||||
|
|
||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
|
@ -22,9 +22,7 @@ def _process_document(doc_in):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
thumb = parser.get_optimised_thumbnail(
|
thumb = parser.get_optimised_thumbnail(
|
||||||
document.source_path,
|
document.source_path, document.mime_type, document.get_public_filename()
|
||||||
document.mime_type,
|
|
||||||
document.get_public_filename()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
shutil.move(thumb, document.thumbnail_path)
|
shutil.move(thumb, document.thumbnail_path)
|
||||||
@ -36,29 +34,32 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
help = """
|
help = """
|
||||||
This will regenerate the thumbnails for all documents.
|
This will regenerate the thumbnails for all documents.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def add_arguments(self, parser):
|
def add_arguments(self, parser):
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"-d", "--document",
|
"-d",
|
||||||
|
"--document",
|
||||||
default=None,
|
default=None,
|
||||||
type=int,
|
type=int,
|
||||||
required=False,
|
required=False,
|
||||||
help="Specify the ID of a document, and this command will only "
|
help="Specify the ID of a document, and this command will only "
|
||||||
"run on this specific document."
|
"run on this specific document.",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--no-progress-bar",
|
"--no-progress-bar",
|
||||||
default=False,
|
default=False,
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="If set, the progress bar will not be shown"
|
help="If set, the progress bar will not be shown",
|
||||||
)
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
logging.getLogger().handlers[0].level = logging.ERROR
|
logging.getLogger().handlers[0].level = logging.ERROR
|
||||||
|
|
||||||
if options['document']:
|
if options["document"]:
|
||||||
documents = Document.objects.filter(pk=options['document'])
|
documents = Document.objects.filter(pk=options["document"])
|
||||||
else:
|
else:
|
||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
@ -70,8 +71,10 @@ class Command(BaseCommand):
|
|||||||
db.connections.close_all()
|
db.connections.close_all()
|
||||||
|
|
||||||
with multiprocessing.Pool() as pool:
|
with multiprocessing.Pool() as pool:
|
||||||
list(tqdm.tqdm(
|
list(
|
||||||
|
tqdm.tqdm(
|
||||||
pool.imap_unordered(_process_document, ids),
|
pool.imap_unordered(_process_document, ids),
|
||||||
total=len(ids),
|
total=len(ids),
|
||||||
disable=options['no_progress_bar']
|
disable=options["no_progress_bar"],
|
||||||
))
|
)
|
||||||
|
)
|
||||||
|
@ -10,11 +10,11 @@ class Command(LoadDataCommand):
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
def parse_name(self, fixture_name):
|
def parse_name(self, fixture_name):
|
||||||
self.compression_formats['stdin'] = (lambda x, y: sys.stdin, None)
|
self.compression_formats["stdin"] = (lambda x, y: sys.stdin, None)
|
||||||
if fixture_name == '-':
|
if fixture_name == "-":
|
||||||
return '-', 'json', 'stdin'
|
return "-", "json", "stdin"
|
||||||
|
|
||||||
def find_fixtures(self, fixture_label):
|
def find_fixtures(self, fixture_label):
|
||||||
if fixture_label == '-':
|
if fixture_label == "-":
|
||||||
return [('-', None, '-')]
|
return [("-", None, "-")]
|
||||||
return super(Command, self).find_fixtures(fixture_label)
|
return super(Command, self).find_fixtures(fixture_label)
|
||||||
|
@ -12,16 +12,18 @@ class Command(BaseCommand):
|
|||||||
|
|
||||||
help = """
|
help = """
|
||||||
Creates a Django superuser based on env variables.
|
Creates a Django superuser based on env variables.
|
||||||
""".replace(" ", "")
|
""".replace(
|
||||||
|
" ", ""
|
||||||
|
)
|
||||||
|
|
||||||
def handle(self, *args, **options):
|
def handle(self, *args, **options):
|
||||||
|
|
||||||
username = os.getenv('PAPERLESS_ADMIN_USER')
|
username = os.getenv("PAPERLESS_ADMIN_USER")
|
||||||
if not username:
|
if not username:
|
||||||
return
|
return
|
||||||
|
|
||||||
mail = os.getenv('PAPERLESS_ADMIN_MAIL', 'root@localhost')
|
mail = os.getenv("PAPERLESS_ADMIN_MAIL", "root@localhost")
|
||||||
password = os.getenv('PAPERLESS_ADMIN_PASSWORD')
|
password = os.getenv("PAPERLESS_ADMIN_PASSWORD")
|
||||||
|
|
||||||
# Check if user exists already, leave as is if it does
|
# Check if user exists already, leave as is if it does
|
||||||
if User.objects.filter(username=username).exists():
|
if User.objects.filter(username=username).exists():
|
||||||
@ -32,11 +34,10 @@ class Command(BaseCommand):
|
|||||||
elif password:
|
elif password:
|
||||||
# Create superuser based on env variables
|
# Create superuser based on env variables
|
||||||
User.objects.create_superuser(username, mail, password)
|
User.objects.create_superuser(username, mail, password)
|
||||||
self.stdout.write(
|
self.stdout.write(f'Created superuser "{username}" with provided password.')
|
||||||
f'Created superuser "{username}" with provided password.')
|
|
||||||
else:
|
else:
|
||||||
self.stdout.write(
|
self.stdout.write(f'Did not create superuser "{username}".')
|
||||||
f'Did not create superuser "{username}".')
|
|
||||||
self.stdout.write(
|
self.stdout.write(
|
||||||
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
|
'Make sure you specified "PAPERLESS_ADMIN_PASSWORD" in your '
|
||||||
'"docker-compose.env" file.')
|
'"docker-compose.env" file.'
|
||||||
|
)
|
||||||
|
@ -12,7 +12,8 @@ def log_reason(matching_model, document, reason):
|
|||||||
class_name = type(matching_model).__name__
|
class_name = type(matching_model).__name__
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"{class_name} {matching_model.name} matched on document "
|
f"{class_name} {matching_model.name} matched on document "
|
||||||
f"{document} because {reason}")
|
f"{document} because {reason}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def match_correspondents(document, classifier):
|
def match_correspondents(document, classifier):
|
||||||
@ -23,9 +24,9 @@ def match_correspondents(document, classifier):
|
|||||||
|
|
||||||
correspondents = Correspondent.objects.all()
|
correspondents = Correspondent.objects.all()
|
||||||
|
|
||||||
return list(filter(
|
return list(
|
||||||
lambda o: matches(o, document) or o.pk == pred_id,
|
filter(lambda o: matches(o, document) or o.pk == pred_id, correspondents)
|
||||||
correspondents))
|
)
|
||||||
|
|
||||||
|
|
||||||
def match_document_types(document, classifier):
|
def match_document_types(document, classifier):
|
||||||
@ -36,9 +37,9 @@ def match_document_types(document, classifier):
|
|||||||
|
|
||||||
document_types = DocumentType.objects.all()
|
document_types = DocumentType.objects.all()
|
||||||
|
|
||||||
return list(filter(
|
return list(
|
||||||
lambda o: matches(o, document) or o.pk == pred_id,
|
filter(lambda o: matches(o, document) or o.pk == pred_id, document_types)
|
||||||
document_types))
|
)
|
||||||
|
|
||||||
|
|
||||||
def match_tags(document, classifier):
|
def match_tags(document, classifier):
|
||||||
@ -49,9 +50,9 @@ def match_tags(document, classifier):
|
|||||||
|
|
||||||
tags = Tag.objects.all()
|
tags = Tag.objects.all()
|
||||||
|
|
||||||
return list(filter(
|
return list(
|
||||||
lambda o: matches(o, document) or o.pk in predicted_tag_ids,
|
filter(lambda o: matches(o, document) or o.pk in predicted_tag_ids, tags)
|
||||||
tags))
|
)
|
||||||
|
|
||||||
|
|
||||||
def matches(matching_model, document):
|
def matches(matching_model, document):
|
||||||
@ -68,73 +69,73 @@ def matches(matching_model, document):
|
|||||||
|
|
||||||
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
|
if matching_model.matching_algorithm == MatchingModel.MATCH_ALL:
|
||||||
for word in _split_match(matching_model):
|
for word in _split_match(matching_model):
|
||||||
search_result = re.search(
|
search_result = re.search(rf"\b{word}\b", document_content, **search_kwargs)
|
||||||
rf"\b{word}\b", document_content, **search_kwargs)
|
|
||||||
if not search_result:
|
if not search_result:
|
||||||
return False
|
return False
|
||||||
log_reason(
|
log_reason(
|
||||||
matching_model, document,
|
matching_model,
|
||||||
f"it contains all of these words: {matching_model.match}"
|
document,
|
||||||
|
f"it contains all of these words: {matching_model.match}",
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
|
elif matching_model.matching_algorithm == MatchingModel.MATCH_ANY:
|
||||||
for word in _split_match(matching_model):
|
for word in _split_match(matching_model):
|
||||||
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
|
if re.search(rf"\b{word}\b", document_content, **search_kwargs):
|
||||||
log_reason(
|
log_reason(matching_model, document, f"it contains this word: {word}")
|
||||||
matching_model, document,
|
|
||||||
f"it contains this word: {word}"
|
|
||||||
)
|
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
|
elif matching_model.matching_algorithm == MatchingModel.MATCH_LITERAL:
|
||||||
result = bool(re.search(
|
result = bool(
|
||||||
|
re.search(
|
||||||
rf"\b{re.escape(matching_model.match)}\b",
|
rf"\b{re.escape(matching_model.match)}\b",
|
||||||
document_content,
|
document_content,
|
||||||
**search_kwargs
|
**search_kwargs,
|
||||||
))
|
)
|
||||||
|
)
|
||||||
if result:
|
if result:
|
||||||
log_reason(
|
log_reason(
|
||||||
matching_model, document,
|
matching_model,
|
||||||
f"it contains this string: \"{matching_model.match}\""
|
document,
|
||||||
|
f'it contains this string: "{matching_model.match}"',
|
||||||
)
|
)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
elif matching_model.matching_algorithm == MatchingModel.MATCH_REGEX:
|
||||||
try:
|
try:
|
||||||
match = re.search(
|
match = re.search(
|
||||||
re.compile(matching_model.match, **search_kwargs),
|
re.compile(matching_model.match, **search_kwargs), document_content
|
||||||
document_content
|
|
||||||
)
|
)
|
||||||
except re.error:
|
except re.error:
|
||||||
logger.error(
|
logger.error(
|
||||||
f"Error while processing regular expression "
|
f"Error while processing regular expression " f"{matching_model.match}"
|
||||||
f"{matching_model.match}"
|
|
||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
if match:
|
if match:
|
||||||
log_reason(
|
log_reason(
|
||||||
matching_model, document,
|
matching_model,
|
||||||
|
document,
|
||||||
f"the string {match.group()} matches the regular expression "
|
f"the string {match.group()} matches the regular expression "
|
||||||
f"{matching_model.match}"
|
f"{matching_model.match}",
|
||||||
)
|
)
|
||||||
return bool(match)
|
return bool(match)
|
||||||
|
|
||||||
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
|
elif matching_model.matching_algorithm == MatchingModel.MATCH_FUZZY:
|
||||||
from fuzzywuzzy import fuzz
|
from fuzzywuzzy import fuzz
|
||||||
|
|
||||||
match = re.sub(r'[^\w\s]', '', matching_model.match)
|
match = re.sub(r"[^\w\s]", "", matching_model.match)
|
||||||
text = re.sub(r'[^\w\s]', '', document_content)
|
text = re.sub(r"[^\w\s]", "", document_content)
|
||||||
if matching_model.is_insensitive:
|
if matching_model.is_insensitive:
|
||||||
match = match.lower()
|
match = match.lower()
|
||||||
text = text.lower()
|
text = text.lower()
|
||||||
if fuzz.partial_ratio(match, text) >= 90:
|
if fuzz.partial_ratio(match, text) >= 90:
|
||||||
# TODO: make this better
|
# TODO: make this better
|
||||||
log_reason(
|
log_reason(
|
||||||
matching_model, document,
|
matching_model,
|
||||||
|
document,
|
||||||
f"parts of the document content somehow match the string "
|
f"parts of the document content somehow match the string "
|
||||||
f"{matching_model.match}"
|
f"{matching_model.match}",
|
||||||
)
|
)
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
@ -162,8 +163,6 @@ def _split_match(matching_model):
|
|||||||
normspace = re.compile(r"\s+").sub
|
normspace = re.compile(r"\s+").sub
|
||||||
return [
|
return [
|
||||||
# normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
# normspace(" ", (t[0] or t[1]).strip()).replace(" ", r"\s+")
|
||||||
re.escape(
|
re.escape(normspace(" ", (t[0] or t[1]).strip())).replace(r"\ ", r"\s+")
|
||||||
normspace(" ", (t[0] or t[1]).strip())
|
|
||||||
).replace(r"\ ", r"\s+")
|
|
||||||
for t in findterms(matching_model.match)
|
for t in findterms(matching_model.match)
|
||||||
]
|
]
|
||||||
|
@ -10,19 +10,33 @@ class Migration(migrations.Migration):
|
|||||||
|
|
||||||
initial = True
|
initial = True
|
||||||
|
|
||||||
dependencies = [
|
dependencies = []
|
||||||
]
|
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='Document',
|
name="Document",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('sender', models.CharField(blank=True, db_index=True, max_length=128)),
|
"id",
|
||||||
('title', models.CharField(blank=True, db_index=True, max_length=128)),
|
models.AutoField(
|
||||||
('content', models.TextField(db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]))),
|
auto_created=True,
|
||||||
('created', models.DateTimeField(auto_now_add=True)),
|
primary_key=True,
|
||||||
('modified', models.DateTimeField(auto_now=True)),
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("sender", models.CharField(blank=True, db_index=True, max_length=128)),
|
||||||
|
("title", models.CharField(blank=True, db_index=True, max_length=128)),
|
||||||
|
(
|
||||||
|
"content",
|
||||||
|
models.TextField(
|
||||||
|
db_index=(
|
||||||
|
"mysql" not in settings.DATABASES["default"]["ENGINE"]
|
||||||
|
)
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("created", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("modified", models.DateTimeField(auto_now=True)),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -9,17 +9,19 @@ import django.utils.timezone
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0001_initial'),
|
("documents", "0001_initial"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='document',
|
name="document",
|
||||||
options={'ordering': ('sender', 'title')},
|
options={"ordering": ("sender", "title")},
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='created',
|
name="created",
|
||||||
field=models.DateTimeField(default=django.utils.timezone.now, editable=False),
|
field=models.DateTimeField(
|
||||||
|
default=django.utils.timezone.now, editable=False
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -19,9 +19,11 @@ def move_sender_strings_to_sender_model(apps, schema_editor):
|
|||||||
# Create the sender and log the relationship with the document
|
# Create the sender and log the relationship with the document
|
||||||
for document in document_model.objects.all():
|
for document in document_model.objects.all():
|
||||||
if document.sender:
|
if document.sender:
|
||||||
DOCUMENT_SENDER_MAP[document.pk], created = sender_model.objects.get_or_create(
|
(
|
||||||
name=document.sender,
|
DOCUMENT_SENDER_MAP[document.pk],
|
||||||
defaults={"slug": slugify(document.sender)}
|
created,
|
||||||
|
) = sender_model.objects.get_or_create(
|
||||||
|
name=document.sender, defaults={"slug": slugify(document.sender)}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -33,27 +35,39 @@ def realign_senders(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0002_auto_20151226_1316'),
|
("documents", "0002_auto_20151226_1316"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='Sender',
|
name="Sender",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('name', models.CharField(max_length=128, unique=True)),
|
"id",
|
||||||
('slug', models.SlugField()),
|
models.AutoField(
|
||||||
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=128, unique=True)),
|
||||||
|
("slug", models.SlugField()),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.RunPython(move_sender_strings_to_sender_model),
|
migrations.RunPython(move_sender_strings_to_sender_model),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='sender',
|
name="sender",
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='sender',
|
name="sender",
|
||||||
field=models.ForeignKey(blank=True, on_delete=django.db.models.deletion.CASCADE, to='documents.Sender'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
to="documents.Sender",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.RunPython(realign_senders),
|
migrations.RunPython(realign_senders),
|
||||||
]
|
]
|
||||||
|
@ -9,13 +9,19 @@ import django.db.models.deletion
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0003_sender'),
|
("documents", "0003_sender"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='sender',
|
name="sender",
|
||||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='documents', to='documents.Sender'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.Sender",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,12 +8,12 @@ from django.db import migrations
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0004_auto_20160114_1844'),
|
("documents", "0004_auto_20160114_1844"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='sender',
|
name="sender",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,30 +8,59 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0005_auto_20160123_0313'),
|
("documents", "0005_auto_20160123_0313"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='Tag',
|
name="Tag",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('name', models.CharField(max_length=128, unique=True)),
|
"id",
|
||||||
('slug', models.SlugField(blank=True)),
|
models.AutoField(
|
||||||
('colour', models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#ffff99'), (12, '#b15928'), (13, '#000000'), (14, '#cccccc')], default=1)),
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=128, unique=True)),
|
||||||
|
("slug", models.SlugField(blank=True)),
|
||||||
|
(
|
||||||
|
"colour",
|
||||||
|
models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "#a6cee3"),
|
||||||
|
(2, "#1f78b4"),
|
||||||
|
(3, "#b2df8a"),
|
||||||
|
(4, "#33a02c"),
|
||||||
|
(5, "#fb9a99"),
|
||||||
|
(6, "#e31a1c"),
|
||||||
|
(7, "#fdbf6f"),
|
||||||
|
(8, "#ff7f00"),
|
||||||
|
(9, "#cab2d6"),
|
||||||
|
(10, "#6a3d9a"),
|
||||||
|
(11, "#ffff99"),
|
||||||
|
(12, "#b15928"),
|
||||||
|
(13, "#000000"),
|
||||||
|
(14, "#cccccc"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
),
|
||||||
|
),
|
||||||
],
|
],
|
||||||
options={
|
options={
|
||||||
'abstract': False,
|
"abstract": False,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='sender',
|
model_name="sender",
|
||||||
name='slug',
|
name="slug",
|
||||||
field=models.SlugField(blank=True),
|
field=models.SlugField(blank=True),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='tags',
|
name="tags",
|
||||||
field=models.ManyToManyField(related_name='documents', to='documents.Tag'),
|
field=models.ManyToManyField(related_name="documents", to="documents.Tag"),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,23 +8,50 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0006_auto_20160123_0430'),
|
("documents", "0006_auto_20160123_0430"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='match',
|
name="match",
|
||||||
field=models.CharField(blank=True, max_length=256),
|
field=models.CharField(blank=True, max_length=256),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(blank=True, choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.', null=True),
|
field=models.PositiveIntegerField(
|
||||||
|
blank=True,
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
],
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||||
|
null=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='colour',
|
name="colour",
|
||||||
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "#a6cee3"),
|
||||||
|
(2, "#1f78b4"),
|
||||||
|
(3, "#b2df8a"),
|
||||||
|
(4, "#33a02c"),
|
||||||
|
(5, "#fb9a99"),
|
||||||
|
(6, "#e31a1c"),
|
||||||
|
(7, "#fdbf6f"),
|
||||||
|
(8, "#ff7f00"),
|
||||||
|
(9, "#cab2d6"),
|
||||||
|
(10, "#6a3d9a"),
|
||||||
|
(11, "#b15928"),
|
||||||
|
(12, "#000000"),
|
||||||
|
(13, "#cccccc"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,20 +8,32 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0007_auto_20160126_2114'),
|
("documents", "0007_auto_20160126_2114"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='file_type',
|
name="file_type",
|
||||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF')], default='pdf', editable=False, max_length=4),
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pdf", "PDF"),
|
||||||
|
("png", "PNG"),
|
||||||
|
("jpg", "JPG"),
|
||||||
|
("gif", "GIF"),
|
||||||
|
("tiff", "TIFF"),
|
||||||
|
],
|
||||||
|
default="pdf",
|
||||||
|
editable=False,
|
||||||
|
max_length=4,
|
||||||
|
),
|
||||||
preserve_default=False,
|
preserve_default=False,
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='tags',
|
name="tags",
|
||||||
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag'),
|
field=models.ManyToManyField(
|
||||||
|
blank=True, related_name="documents", to="documents.Tag"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
@ -8,13 +8,22 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0008_document_file_type'),
|
("documents", "0008_document_file_type"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,23 +8,48 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0009_auto_20160214_0040'),
|
("documents", "0009_auto_20160214_0040"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='Log',
|
name="Log",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('group', models.UUIDField(blank=True)),
|
"id",
|
||||||
('message', models.TextField()),
|
models.AutoField(
|
||||||
('level', models.PositiveIntegerField(choices=[(10, 'Debugging'), (20, 'Informational'), (30, 'Warning'), (40, 'Error'), (50, 'Critical')], default=20)),
|
auto_created=True,
|
||||||
('component', models.PositiveIntegerField(choices=[(1, 'Consumer'), (2, 'Mail Fetcher')])),
|
primary_key=True,
|
||||||
('created', models.DateTimeField(auto_now_add=True)),
|
serialize=False,
|
||||||
('modified', models.DateTimeField(auto_now=True)),
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("group", models.UUIDField(blank=True)),
|
||||||
|
("message", models.TextField()),
|
||||||
|
(
|
||||||
|
"level",
|
||||||
|
models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(10, "Debugging"),
|
||||||
|
(20, "Informational"),
|
||||||
|
(30, "Warning"),
|
||||||
|
(40, "Error"),
|
||||||
|
(50, "Critical"),
|
||||||
|
],
|
||||||
|
default=20,
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"component",
|
||||||
|
models.PositiveIntegerField(
|
||||||
|
choices=[(1, "Consumer"), (2, "Mail Fetcher")]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("created", models.DateTimeField(auto_now_add=True)),
|
||||||
|
("modified", models.DateTimeField(auto_now=True)),
|
||||||
],
|
],
|
||||||
options={
|
options={
|
||||||
'ordering': ('-modified',),
|
"ordering": ("-modified",),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,21 +8,21 @@ from django.db import migrations
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
atomic = False
|
atomic = False
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0010_log'),
|
("documents", "0010_log"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.RenameModel(
|
migrations.RenameModel(
|
||||||
old_name='Sender',
|
old_name="Sender",
|
||||||
new_name='Correspondent',
|
new_name="Correspondent",
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='document',
|
name="document",
|
||||||
options={'ordering': ('correspondent', 'title')},
|
options={"ordering": ("correspondent", "title")},
|
||||||
),
|
),
|
||||||
migrations.RenameField(
|
migrations.RenameField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
old_name='sender',
|
old_name="sender",
|
||||||
new_name='correspondent',
|
new_name="correspondent",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -23,37 +23,40 @@ class GnuPG(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def decrypted(cls, file_handle):
|
def decrypted(cls, file_handle):
|
||||||
return cls.gpg.decrypt_file(
|
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
|
||||||
file_handle, passphrase=settings.PASSPHRASE).data
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def encrypted(cls, file_handle):
|
def encrypted(cls, file_handle):
|
||||||
return cls.gpg.encrypt_file(
|
return cls.gpg.encrypt_file(
|
||||||
file_handle,
|
file_handle, recipients=None, passphrase=settings.PASSPHRASE, symmetric=True
|
||||||
recipients=None,
|
|
||||||
passphrase=settings.PASSPHRASE,
|
|
||||||
symmetric=True
|
|
||||||
).data
|
).data
|
||||||
|
|
||||||
|
|
||||||
def move_documents_and_create_thumbnails(apps, schema_editor):
|
def move_documents_and_create_thumbnails(apps, schema_editor):
|
||||||
|
|
||||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "originals"), exist_ok=True)
|
os.makedirs(
|
||||||
os.makedirs(os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), exist_ok=True)
|
os.path.join(settings.MEDIA_ROOT, "documents", "originals"), exist_ok=True
|
||||||
|
)
|
||||||
|
os.makedirs(
|
||||||
|
os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails"), exist_ok=True
|
||||||
|
)
|
||||||
|
|
||||||
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
|
documents = os.listdir(os.path.join(settings.MEDIA_ROOT, "documents"))
|
||||||
|
|
||||||
if set(documents) == {"originals", "thumbnails"}:
|
if set(documents) == {"originals", "thumbnails"}:
|
||||||
return
|
return
|
||||||
|
|
||||||
print(colourise(
|
print(
|
||||||
|
colourise(
|
||||||
"\n\n"
|
"\n\n"
|
||||||
" This is a one-time only migration to generate thumbnails for all of your\n"
|
" This is a one-time only migration to generate thumbnails for all of your\n"
|
||||||
" documents so that future UIs will have something to work with. If you have\n"
|
" documents so that future UIs will have something to work with. If you have\n"
|
||||||
" a lot of documents though, this may take a while, so a coffee break may be\n"
|
" a lot of documents though, this may take a while, so a coffee break may be\n"
|
||||||
" in order."
|
" in order."
|
||||||
"\n", opts=("bold",)
|
"\n",
|
||||||
))
|
opts=("bold",),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.makedirs(settings.SCRATCH_DIR)
|
os.makedirs(settings.SCRATCH_DIR)
|
||||||
@ -65,16 +68,16 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
if not f.endswith("gpg"):
|
if not f.endswith("gpg"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
print(" {} {} {}".format(
|
print(
|
||||||
|
" {} {} {}".format(
|
||||||
colourise("*", fg="green"),
|
colourise("*", fg="green"),
|
||||||
colourise("Generating a thumbnail for", fg="white"),
|
colourise("Generating a thumbnail for", fg="white"),
|
||||||
colourise(f, fg="cyan")
|
colourise(f, fg="cyan"),
|
||||||
))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
thumb_temp = tempfile.mkdtemp(
|
thumb_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||||
prefix="paperless", dir=settings.SCRATCH_DIR)
|
orig_temp = tempfile.mkdtemp(prefix="paperless", dir=settings.SCRATCH_DIR)
|
||||||
orig_temp = tempfile.mkdtemp(
|
|
||||||
prefix="paperless", dir=settings.SCRATCH_DIR)
|
|
||||||
|
|
||||||
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
|
orig_source = os.path.join(settings.MEDIA_ROOT, "documents", f)
|
||||||
orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
|
orig_target = os.path.join(orig_temp, f.replace(".gpg", ""))
|
||||||
@ -83,20 +86,24 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
with open(orig_target, "wb") as unencrypted:
|
with open(orig_target, "wb") as unencrypted:
|
||||||
unencrypted.write(GnuPG.decrypted(encrypted))
|
unencrypted.write(GnuPG.decrypted(encrypted))
|
||||||
|
|
||||||
subprocess.Popen((
|
subprocess.Popen(
|
||||||
|
(
|
||||||
settings.CONVERT_BINARY,
|
settings.CONVERT_BINARY,
|
||||||
"-scale", "500x5000",
|
"-scale",
|
||||||
"-alpha", "remove",
|
"500x5000",
|
||||||
|
"-alpha",
|
||||||
|
"remove",
|
||||||
orig_target,
|
orig_target,
|
||||||
os.path.join(thumb_temp, "convert-%04d.png")
|
os.path.join(thumb_temp, "convert-%04d.png"),
|
||||||
)).wait()
|
)
|
||||||
|
).wait()
|
||||||
|
|
||||||
thumb_source = os.path.join(thumb_temp, "convert-0000.png")
|
thumb_source = os.path.join(thumb_temp, "convert-0000.png")
|
||||||
thumb_target = os.path.join(
|
thumb_target = os.path.join(
|
||||||
settings.MEDIA_ROOT,
|
settings.MEDIA_ROOT,
|
||||||
"documents",
|
"documents",
|
||||||
"thumbnails",
|
"thumbnails",
|
||||||
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f)
|
re.sub(r"(\d+)\.\w+(\.gpg)", "\\1.png\\2", f),
|
||||||
)
|
)
|
||||||
with open(thumb_source, "rb") as unencrypted:
|
with open(thumb_source, "rb") as unencrypted:
|
||||||
with open(thumb_target, "wb") as encrypted:
|
with open(thumb_target, "wb") as encrypted:
|
||||||
@ -113,7 +120,7 @@ def move_documents_and_create_thumbnails(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0011_auto_20160303_1929'),
|
("documents", "0011_auto_20160303_1929"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
@ -9,27 +9,36 @@ import django.utils.timezone
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0012_auto_20160305_0040'),
|
("documents", "0012_auto_20160305_0040"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='match',
|
name="match",
|
||||||
field=models.CharField(blank=True, max_length=256),
|
field=models.CharField(blank=True, max_length=256),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. If you don\'t know what a regex is, you probably don\'t want this option.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='created',
|
name="created",
|
||||||
field=models.DateTimeField(default=django.utils.timezone.now),
|
field=models.DateTimeField(default=django.utils.timezone.now),
|
||||||
),
|
),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='component',
|
name="component",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -22,16 +22,12 @@ class GnuPG(object):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def decrypted(cls, file_handle):
|
def decrypted(cls, file_handle):
|
||||||
return cls.gpg.decrypt_file(
|
return cls.gpg.decrypt_file(file_handle, passphrase=settings.PASSPHRASE).data
|
||||||
file_handle, passphrase=settings.PASSPHRASE).data
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def encrypted(cls, file_handle):
|
def encrypted(cls, file_handle):
|
||||||
return cls.gpg.encrypt_file(
|
return cls.gpg.encrypt_file(
|
||||||
file_handle,
|
file_handle, recipients=None, passphrase=settings.PASSPHRASE, symmetric=True
|
||||||
recipients=None,
|
|
||||||
passphrase=settings.PASSPHRASE,
|
|
||||||
symmetric=True
|
|
||||||
).data
|
).data
|
||||||
|
|
||||||
|
|
||||||
@ -53,8 +49,7 @@ class Document(object):
|
|||||||
def __str__(self):
|
def __str__(self):
|
||||||
created = self.created.strftime("%Y%m%d%H%M%S")
|
created = self.created.strftime("%Y%m%d%H%M%S")
|
||||||
if self.correspondent and self.title:
|
if self.correspondent and self.title:
|
||||||
return "{}: {} - {}".format(
|
return "{}: {} - {}".format(created, self.correspondent, self.title)
|
||||||
created, self.correspondent, self.title)
|
|
||||||
if self.correspondent or self.title:
|
if self.correspondent or self.title:
|
||||||
return "{}: {}".format(created, self.correspondent or self.title)
|
return "{}: {}".format(created, self.correspondent or self.title)
|
||||||
return str(created)
|
return str(created)
|
||||||
@ -65,7 +60,7 @@ class Document(object):
|
|||||||
settings.MEDIA_ROOT,
|
settings.MEDIA_ROOT,
|
||||||
"documents",
|
"documents",
|
||||||
"originals",
|
"originals",
|
||||||
"{:07}.{}.gpg".format(self.pk, self.file_type)
|
"{:07}.{}.gpg".format(self.pk, self.file_type),
|
||||||
)
|
)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@ -84,38 +79,62 @@ def set_checksums(apps, schema_editor):
|
|||||||
if not document_model.objects.all().exists():
|
if not document_model.objects.all().exists():
|
||||||
return
|
return
|
||||||
|
|
||||||
print(colourise(
|
print(
|
||||||
|
colourise(
|
||||||
"\n\n"
|
"\n\n"
|
||||||
" This is a one-time only migration to generate checksums for all\n"
|
" This is a one-time only migration to generate checksums for all\n"
|
||||||
" of your existing documents. If you have a lot of documents\n"
|
" of your existing documents. If you have a lot of documents\n"
|
||||||
" though, this may take a while, so a coffee break may be in\n"
|
" though, this may take a while, so a coffee break may be in\n"
|
||||||
" order."
|
" order."
|
||||||
"\n", opts=("bold",)
|
"\n",
|
||||||
))
|
opts=("bold",),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
sums = {}
|
sums = {}
|
||||||
for d in document_model.objects.all():
|
for d in document_model.objects.all():
|
||||||
|
|
||||||
document = Document(d)
|
document = Document(d)
|
||||||
|
|
||||||
print(" {} {} {}".format(
|
print(
|
||||||
|
" {} {} {}".format(
|
||||||
colourise("*", fg="green"),
|
colourise("*", fg="green"),
|
||||||
colourise("Generating a checksum for", fg="white"),
|
colourise("Generating a checksum for", fg="white"),
|
||||||
colourise(document.file_name, fg="cyan")
|
colourise(document.file_name, fg="cyan"),
|
||||||
))
|
)
|
||||||
|
)
|
||||||
|
|
||||||
with document.source_file as encrypted:
|
with document.source_file as encrypted:
|
||||||
checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()
|
checksum = hashlib.md5(GnuPG.decrypted(encrypted)).hexdigest()
|
||||||
|
|
||||||
if checksum in sums:
|
if checksum in sums:
|
||||||
error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
|
error = "\n{line}{p1}\n\n{doc1}\n{doc2}\n\n{p2}\n\n{code}\n\n{p3}{line}".format(
|
||||||
p1=colourise("It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:", fg="yellow"),
|
p1=colourise(
|
||||||
p2=colourise("To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:", fg="yellow"),
|
"It appears that you have two identical documents in your collection and \nPaperless no longer supports this (see issue #97). The documents in question\nare:",
|
||||||
p3=colourise("When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.", fg="yellow"),
|
fg="yellow",
|
||||||
doc1=colourise(" * {} (id: {})".format(sums[checksum][1], sums[checksum][0]), fg="red"),
|
),
|
||||||
doc2=colourise(" * {} (id: {})".format(document.file_name, document.pk), fg="red"),
|
p2=colourise(
|
||||||
code=colourise(" $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(pk=document.pk), fg="green"),
|
"To fix this problem, you'll have to remove one of them from the database, a task\nmost easily done by running the following command in the same\ndirectory as manage.py:",
|
||||||
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",))
|
fg="yellow",
|
||||||
|
),
|
||||||
|
p3=colourise(
|
||||||
|
"When that's finished, re-run the migrate, and provided that there aren't any\nother duplicates, you should be good to go.",
|
||||||
|
fg="yellow",
|
||||||
|
),
|
||||||
|
doc1=colourise(
|
||||||
|
" * {} (id: {})".format(sums[checksum][1], sums[checksum][0]),
|
||||||
|
fg="red",
|
||||||
|
),
|
||||||
|
doc2=colourise(
|
||||||
|
" * {} (id: {})".format(document.file_name, document.pk), fg="red"
|
||||||
|
),
|
||||||
|
code=colourise(
|
||||||
|
" $ echo 'DELETE FROM documents_document WHERE id = {pk};' | ./manage.py dbshell".format(
|
||||||
|
pk=document.pk
|
||||||
|
),
|
||||||
|
fg="green",
|
||||||
|
),
|
||||||
|
line=colourise("\n{}\n".format("=" * 80), fg="white", opts=("bold",)),
|
||||||
)
|
)
|
||||||
raise RuntimeError(error)
|
raise RuntimeError(error)
|
||||||
sums[checksum] = (document.pk, document.file_name)
|
sums[checksum] = (document.pk, document.file_name)
|
||||||
@ -129,33 +148,35 @@ def do_nothing(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0013_auto_20160325_2111'),
|
("documents", "0013_auto_20160325_2111"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='checksum',
|
name="checksum",
|
||||||
field=models.CharField(
|
field=models.CharField(
|
||||||
default='-',
|
default="-",
|
||||||
db_index=True,
|
db_index=True,
|
||||||
editable=False,
|
editable=False,
|
||||||
max_length=32,
|
max_length=32,
|
||||||
help_text='The checksum of the original document (before it '
|
help_text="The checksum of the original document (before it "
|
||||||
'was encrypted). We use this to prevent duplicate '
|
"was encrypted). We use this to prevent duplicate "
|
||||||
'document imports.',
|
"document imports.",
|
||||||
),
|
),
|
||||||
preserve_default=False,
|
preserve_default=False,
|
||||||
),
|
),
|
||||||
migrations.RunPython(set_checksums, do_nothing),
|
migrations.RunPython(set_checksums, do_nothing),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='created',
|
name="created",
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now),
|
field=models.DateTimeField(
|
||||||
|
db_index=True, default=django.utils.timezone.now
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='modified',
|
name="modified",
|
||||||
field=models.DateTimeField(auto_now=True, db_index=True),
|
field=models.DateTimeField(auto_now=True, db_index=True),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,23 +8,28 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0014_document_checksum'),
|
("documents", "0014_document_checksum"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='checksum',
|
name="checksum",
|
||||||
field=models.CharField(editable=False, help_text='The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.', max_length=32, unique=True),
|
field=models.CharField(
|
||||||
|
editable=False,
|
||||||
|
help_text="The checksum of the original document (before it was encrypted). We use this to prevent duplicate document imports.",
|
||||||
|
max_length=32,
|
||||||
|
unique=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='is_insensitive',
|
name="is_insensitive",
|
||||||
field=models.BooleanField(default=True),
|
field=models.BooleanField(default=True),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='is_insensitive',
|
name="is_insensitive",
|
||||||
field=models.BooleanField(default=True),
|
field=models.BooleanField(default=True),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -9,13 +9,17 @@ from django.conf import settings
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0015_add_insensitive_to_match'),
|
("documents", "0015_add_insensitive_to_match"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='content',
|
name="content",
|
||||||
field=models.TextField(blank=True, db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]), help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
field=models.TextField(
|
||||||
|
blank=True,
|
||||||
|
db_index=("mysql" not in settings.DATABASES["default"]["ENGINE"]),
|
||||||
|
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,18 +8,38 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0016_auto_20170325_1558'),
|
("documents", "0016_auto_20170325_1558"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
(5, "Fuzzy Match"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
(5, "Fuzzy Match"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -9,13 +9,19 @@ import django.db.models.deletion
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0017_auto_20170512_0507'),
|
("documents", "0017_auto_20170512_0507"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='correspondent',
|
name="correspondent",
|
||||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.Correspondent'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.Correspondent",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -16,7 +16,7 @@ def reverse_func(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0018_auto_20170715_1712'),
|
("documents", "0018_auto_20170715_1712"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
@ -14,14 +14,16 @@ def set_added_time_to_created_time(apps, schema_editor):
|
|||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0019_add_consumer_user'),
|
("documents", "0019_add_consumer_user"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='added',
|
name="added",
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False),
|
field=models.DateTimeField(
|
||||||
|
db_index=True, default=django.utils.timezone.now, editable=False
|
||||||
),
|
),
|
||||||
migrations.RunPython(set_added_time_to_created_time)
|
),
|
||||||
|
migrations.RunPython(set_added_time_to_created_time),
|
||||||
]
|
]
|
||||||
|
@ -8,23 +8,36 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0020_document_added'),
|
("documents", "0020_document_added"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
|
||||||
# Add the field with the default GPG-encrypted value
|
# Add the field with the default GPG-encrypted value
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='storage_type',
|
name="storage_type",
|
||||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='gpg', editable=False, max_length=11),
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("unencrypted", "Unencrypted"),
|
||||||
|
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||||
|
],
|
||||||
|
default="gpg",
|
||||||
|
editable=False,
|
||||||
|
max_length=11,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
|
|
||||||
# Now that the field is added, change the default to unencrypted
|
# Now that the field is added, change the default to unencrypted
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='storage_type',
|
name="storage_type",
|
||||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11),
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("unencrypted", "Unencrypted"),
|
||||||
|
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||||
|
],
|
||||||
|
default="unencrypted",
|
||||||
|
editable=False,
|
||||||
|
max_length=11,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
|
|
||||||
]
|
]
|
||||||
|
@ -15,38 +15,47 @@ def re_slug_all_the_things(apps, schema_editor):
|
|||||||
|
|
||||||
for klass in (Tag, Correspondent):
|
for klass in (Tag, Correspondent):
|
||||||
for instance in klass.objects.all():
|
for instance in klass.objects.all():
|
||||||
klass.objects.filter(
|
klass.objects.filter(pk=instance.pk).update(slug=slugify(instance.slug))
|
||||||
pk=instance.pk
|
|
||||||
).update(
|
|
||||||
slug=slugify(instance.slug)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0021_document_storage_type'),
|
("documents", "0021_document_storage_type"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='tag',
|
name="tag",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='slug',
|
name="slug",
|
||||||
field=models.SlugField(blank=True, editable=False),
|
field=models.SlugField(blank=True, editable=False),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='file_type',
|
name="file_type",
|
||||||
field=models.CharField(choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')], editable=False, max_length=4),
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("pdf", "PDF"),
|
||||||
|
("png", "PNG"),
|
||||||
|
("jpg", "JPG"),
|
||||||
|
("gif", "GIF"),
|
||||||
|
("tiff", "TIFF"),
|
||||||
|
("txt", "TXT"),
|
||||||
|
("csv", "CSV"),
|
||||||
|
("md", "MD"),
|
||||||
|
],
|
||||||
|
editable=False,
|
||||||
|
max_length=4,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='slug',
|
name="slug",
|
||||||
field=models.SlugField(blank=True, editable=False),
|
field=models.SlugField(blank=True, editable=False),
|
||||||
),
|
),
|
||||||
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop)
|
migrations.RunPython(re_slug_all_the_things, migrations.RunPython.noop),
|
||||||
]
|
]
|
||||||
|
@ -20,18 +20,20 @@ def set_filename(apps, schema_editor):
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0022_auto_20181007_1420'),
|
("documents", "0022_auto_20181007_1420"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='filename',
|
name="filename",
|
||||||
field=models.FilePathField(default=None,
|
field=models.FilePathField(
|
||||||
|
default=None,
|
||||||
null=True,
|
null=True,
|
||||||
editable=False,
|
editable=False,
|
||||||
help_text='Current filename in storage',
|
help_text="Current filename in storage",
|
||||||
max_length=256),
|
max_length=256,
|
||||||
),
|
),
|
||||||
migrations.RunPython(set_filename)
|
),
|
||||||
|
migrations.RunPython(set_filename),
|
||||||
]
|
]
|
||||||
|
@ -6,7 +6,7 @@ import django.db.models.deletion
|
|||||||
|
|
||||||
|
|
||||||
def logs_set_default_group(apps, schema_editor):
|
def logs_set_default_group(apps, schema_editor):
|
||||||
Log = apps.get_model('documents', 'Log')
|
Log = apps.get_model("documents", "Log")
|
||||||
for log in Log.objects.all():
|
for log in Log.objects.all():
|
||||||
if log.group is None:
|
if log.group is None:
|
||||||
log.group = uuid.uuid4()
|
log.group = uuid.uuid4()
|
||||||
@ -16,70 +16,132 @@ def logs_set_default_group(apps, schema_editor):
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '0023_document_current_filename'),
|
("documents", "0023_document_current_filename"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='archive_serial_number',
|
name="archive_serial_number",
|
||||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True),
|
field=models.IntegerField(
|
||||||
|
blank=True,
|
||||||
|
db_index=True,
|
||||||
|
help_text="The position of this document in your physical document archive.",
|
||||||
|
null=True,
|
||||||
|
unique=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='is_inbox_tag',
|
name="is_inbox_tag",
|
||||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.'),
|
field=models.BooleanField(
|
||||||
|
default=False,
|
||||||
|
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='DocumentType',
|
name="DocumentType",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('name', models.CharField(max_length=128, unique=True)),
|
"id",
|
||||||
('slug', models.SlugField(blank=True, editable=False)),
|
models.AutoField(
|
||||||
('match', models.CharField(blank=True, max_length=256)),
|
auto_created=True,
|
||||||
('matching_algorithm', models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.')),
|
primary_key=True,
|
||||||
('is_insensitive', models.BooleanField(default=True)),
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=128, unique=True)),
|
||||||
|
("slug", models.SlugField(blank=True, editable=False)),
|
||||||
|
("match", models.CharField(blank=True, max_length=256)),
|
||||||
|
(
|
||||||
|
"matching_algorithm",
|
||||||
|
models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
(5, "Fuzzy Match"),
|
||||||
|
(6, "Automatic Classification"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("is_insensitive", models.BooleanField(default=True)),
|
||||||
],
|
],
|
||||||
options={
|
options={
|
||||||
'abstract': False,
|
"abstract": False,
|
||||||
'ordering': ('name',),
|
"ordering": ("name",),
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='document_type',
|
name="document_type",
|
||||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.documenttype",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
(5, "Fuzzy Match"),
|
||||||
|
(6, "Automatic Classification"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any'), (2, 'All'), (3, 'Literal'), (4, 'Regular Expression'), (5, 'Fuzzy Match'), (6, 'Automatic Classification')], default=1, help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any"),
|
||||||
|
(2, "All"),
|
||||||
|
(3, "Literal"),
|
||||||
|
(4, "Regular Expression"),
|
||||||
|
(5, "Fuzzy Match"),
|
||||||
|
(6, "Automatic Classification"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
help_text='Which algorithm you want to use when matching text to the OCR\'d PDF. Here, "any" looks for any occurrence of any word provided in the PDF, while "all" requires that every word provided appear in the PDF, albeit not in the order provided. A "literal" match means that the text you enter must appear in the PDF exactly as you\'ve entered it, and "regular expression" uses a regex to match the PDF. (If you don\'t know what a regex is, you probably don\'t want this option.) Finally, a "fuzzy match" looks for words or phrases that are mostly—but not exactly—the same, which can be useful for matching against documents containg imperfections that foil accurate OCR.',
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='content',
|
name="content",
|
||||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.'),
|
field=models.TextField(
|
||||||
|
blank=True,
|
||||||
|
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='log',
|
name="log",
|
||||||
options={'ordering': ('-created',)},
|
options={"ordering": ("-created",)},
|
||||||
),
|
),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='modified',
|
name="modified",
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='group',
|
name="group",
|
||||||
field=models.UUIDField(blank=True, null=True),
|
field=models.UUIDField(blank=True, null=True),
|
||||||
),
|
),
|
||||||
migrations.RunPython(
|
migrations.RunPython(
|
||||||
code=django.db.migrations.operations.special.RunPython.noop,
|
code=django.db.migrations.operations.special.RunPython.noop,
|
||||||
reverse_code=logs_set_default_group
|
reverse_code=logs_set_default_group,
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -7,22 +7,28 @@ from django_q.tasks import schedule
|
|||||||
|
|
||||||
|
|
||||||
def add_schedules(apps, schema_editor):
|
def add_schedules(apps, schema_editor):
|
||||||
schedule('documents.tasks.train_classifier', name="Train the classifier", schedule_type=Schedule.HOURLY)
|
schedule(
|
||||||
schedule('documents.tasks.index_optimize', name="Optimize the index", schedule_type=Schedule.DAILY)
|
"documents.tasks.train_classifier",
|
||||||
|
name="Train the classifier",
|
||||||
|
schedule_type=Schedule.HOURLY,
|
||||||
|
)
|
||||||
|
schedule(
|
||||||
|
"documents.tasks.index_optimize",
|
||||||
|
name="Optimize the index",
|
||||||
|
schedule_type=Schedule.DAILY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def remove_schedules(apps, schema_editor):
|
def remove_schedules(apps, schema_editor):
|
||||||
Schedule.objects.filter(func='documents.tasks.train_classifier').delete()
|
Schedule.objects.filter(func="documents.tasks.train_classifier").delete()
|
||||||
Schedule.objects.filter(func='documents.tasks.index_optimize').delete()
|
Schedule.objects.filter(func="documents.tasks.index_optimize").delete()
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1000_update_paperless_all'),
|
("documents", "1000_update_paperless_all"),
|
||||||
('django_q', '0013_task_attempt_count'),
|
("django_q", "0013_task_attempt_count"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [RunPython(add_schedules, remove_schedules)]
|
||||||
RunPython(add_schedules, remove_schedules)
|
|
||||||
]
|
|
||||||
|
@ -6,13 +6,19 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1001_auto_20201109_1636'),
|
("documents", "1001_auto_20201109_1636"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='filename',
|
name="filename",
|
||||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True),
|
field=models.FilePathField(
|
||||||
|
default=None,
|
||||||
|
editable=False,
|
||||||
|
help_text="Current filename in storage",
|
||||||
|
max_length=1024,
|
||||||
|
null=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -20,10 +20,7 @@ def source_path(self):
|
|||||||
if self.storage_type == STORAGE_TYPE_GPG:
|
if self.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg"
|
fname += ".gpg"
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def add_mime_types(apps, schema_editor):
|
def add_mime_types(apps, schema_editor):
|
||||||
@ -49,43 +46,51 @@ def add_file_extensions(apps, schema_editor):
|
|||||||
documents = Document.objects.all()
|
documents = Document.objects.all()
|
||||||
|
|
||||||
for d in documents:
|
for d in documents:
|
||||||
d.file_type = os.path.splitext(d.filename)[1].strip('.')
|
d.file_type = os.path.splitext(d.filename)[1].strip(".")
|
||||||
d.save()
|
d.save()
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1002_auto_20201111_1105'),
|
("documents", "1002_auto_20201111_1105"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='mime_type',
|
name="mime_type",
|
||||||
field=models.CharField(default="-", editable=False, max_length=256),
|
field=models.CharField(default="-", editable=False, max_length=256),
|
||||||
preserve_default=False,
|
preserve_default=False,
|
||||||
),
|
),
|
||||||
migrations.RunPython(add_mime_types, migrations.RunPython.noop),
|
migrations.RunPython(add_mime_types, migrations.RunPython.noop),
|
||||||
|
|
||||||
# This operation is here so that we can revert the entire migration:
|
# This operation is here so that we can revert the entire migration:
|
||||||
# By allowing this field to be blank and null, we can revert the
|
# By allowing this field to be blank and null, we can revert the
|
||||||
# remove operation further down and the database won't complain about
|
# remove operation further down and the database won't complain about
|
||||||
# NOT NULL violations.
|
# NOT NULL violations.
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='file_type',
|
name="file_type",
|
||||||
field=models.CharField(
|
field=models.CharField(
|
||||||
choices=[('pdf', 'PDF'), ('png', 'PNG'), ('jpg', 'JPG'), ('gif', 'GIF'), ('tiff', 'TIFF'), ('txt', 'TXT'), ('csv', 'CSV'), ('md', 'MD')],
|
choices=[
|
||||||
|
("pdf", "PDF"),
|
||||||
|
("png", "PNG"),
|
||||||
|
("jpg", "JPG"),
|
||||||
|
("gif", "GIF"),
|
||||||
|
("tiff", "TIFF"),
|
||||||
|
("txt", "TXT"),
|
||||||
|
("csv", "CSV"),
|
||||||
|
("md", "MD"),
|
||||||
|
],
|
||||||
editable=False,
|
editable=False,
|
||||||
max_length=4,
|
max_length=4,
|
||||||
null=True,
|
null=True,
|
||||||
blank=True
|
blank=True,
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
migrations.RunPython(migrations.RunPython.noop, add_file_extensions),
|
migrations.RunPython(migrations.RunPython.noop, add_file_extensions),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='file_type',
|
name="file_type",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -7,20 +7,22 @@ from django_q.tasks import schedule
|
|||||||
|
|
||||||
|
|
||||||
def add_schedules(apps, schema_editor):
|
def add_schedules(apps, schema_editor):
|
||||||
schedule('documents.tasks.sanity_check', name="Perform sanity check", schedule_type=Schedule.WEEKLY)
|
schedule(
|
||||||
|
"documents.tasks.sanity_check",
|
||||||
|
name="Perform sanity check",
|
||||||
|
schedule_type=Schedule.WEEKLY,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def remove_schedules(apps, schema_editor):
|
def remove_schedules(apps, schema_editor):
|
||||||
Schedule.objects.filter(func='documents.tasks.sanity_check').delete()
|
Schedule.objects.filter(func="documents.tasks.sanity_check").delete()
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1003_mime_types'),
|
("documents", "1003_mime_types"),
|
||||||
('django_q', '0013_task_attempt_count'),
|
("django_q", "0013_task_attempt_count"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [RunPython(add_schedules, remove_schedules)]
|
||||||
RunPython(add_schedules, remove_schedules)
|
|
||||||
]
|
|
||||||
|
@ -6,18 +6,29 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1004_sanity_check_schedule'),
|
("documents", "1004_sanity_check_schedule"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='archive_checksum',
|
name="archive_checksum",
|
||||||
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True),
|
field=models.CharField(
|
||||||
|
blank=True,
|
||||||
|
editable=False,
|
||||||
|
help_text="The checksum of the archived document.",
|
||||||
|
max_length=32,
|
||||||
|
null=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='checksum',
|
name="checksum",
|
||||||
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True),
|
field=models.CharField(
|
||||||
|
editable=False,
|
||||||
|
help_text="The checksum of the original document.",
|
||||||
|
max_length=32,
|
||||||
|
unique=True,
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -6,20 +6,20 @@ from django.db import migrations
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1005_checksums'),
|
("documents", "1005_checksums"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='slug',
|
name="slug",
|
||||||
),
|
),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='documenttype',
|
model_name="documenttype",
|
||||||
name='slug',
|
name="slug",
|
||||||
),
|
),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='slug',
|
name="slug",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -9,29 +9,82 @@ class Migration(migrations.Migration):
|
|||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
('documents', '1006_auto_20201208_2209'),
|
("documents", "1006_auto_20201208_2209"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='SavedView',
|
name="SavedView",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('name', models.CharField(max_length=128)),
|
"id",
|
||||||
('show_on_dashboard', models.BooleanField()),
|
models.AutoField(
|
||||||
('show_in_sidebar', models.BooleanField()),
|
auto_created=True,
|
||||||
('sort_field', models.CharField(max_length=128)),
|
primary_key=True,
|
||||||
('sort_reverse', models.BooleanField(default=False)),
|
serialize=False,
|
||||||
('user', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL)),
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("name", models.CharField(max_length=128)),
|
||||||
|
("show_on_dashboard", models.BooleanField()),
|
||||||
|
("show_in_sidebar", models.BooleanField()),
|
||||||
|
("sort_field", models.CharField(max_length=128)),
|
||||||
|
("sort_reverse", models.BooleanField(default=False)),
|
||||||
|
(
|
||||||
|
"user",
|
||||||
|
models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
),
|
||||||
|
),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
migrations.CreateModel(
|
migrations.CreateModel(
|
||||||
name='SavedViewFilterRule',
|
name="SavedViewFilterRule",
|
||||||
fields=[
|
fields=[
|
||||||
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
(
|
||||||
('rule_type', models.PositiveIntegerField(choices=[(0, 'Title contains'), (1, 'Content contains'), (2, 'ASN is'), (3, 'Correspondent is'), (4, 'Document type is'), (5, 'Is in inbox'), (6, 'Has tag'), (7, 'Has any tag'), (8, 'Created before'), (9, 'Created after'), (10, 'Created year is'), (11, 'Created month is'), (12, 'Created day is'), (13, 'Added before'), (14, 'Added after'), (15, 'Modified before'), (16, 'Modified after'), (17, 'Does not have tag')])),
|
"id",
|
||||||
('value', models.CharField(max_length=128)),
|
models.AutoField(
|
||||||
('saved_view', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview')),
|
auto_created=True,
|
||||||
|
primary_key=True,
|
||||||
|
serialize=False,
|
||||||
|
verbose_name="ID",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"rule_type",
|
||||||
|
models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(0, "Title contains"),
|
||||||
|
(1, "Content contains"),
|
||||||
|
(2, "ASN is"),
|
||||||
|
(3, "Correspondent is"),
|
||||||
|
(4, "Document type is"),
|
||||||
|
(5, "Is in inbox"),
|
||||||
|
(6, "Has tag"),
|
||||||
|
(7, "Has any tag"),
|
||||||
|
(8, "Created before"),
|
||||||
|
(9, "Created after"),
|
||||||
|
(10, "Created year is"),
|
||||||
|
(11, "Created month is"),
|
||||||
|
(12, "Created day is"),
|
||||||
|
(13, "Added before"),
|
||||||
|
(14, "Added after"),
|
||||||
|
(15, "Modified before"),
|
||||||
|
(16, "Modified after"),
|
||||||
|
(17, "Does not have tag"),
|
||||||
|
]
|
||||||
|
),
|
||||||
|
),
|
||||||
|
("value", models.CharField(max_length=128)),
|
||||||
|
(
|
||||||
|
"saved_view",
|
||||||
|
models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="filter_rules",
|
||||||
|
to="documents.savedview",
|
||||||
|
),
|
||||||
|
),
|
||||||
],
|
],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -7,28 +7,28 @@ import django.db.models.functions.text
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1007_savedview_savedviewfilterrule'),
|
("documents", "1007_savedview_savedviewfilterrule"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='correspondent',
|
name="correspondent",
|
||||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='document',
|
name="document",
|
||||||
options={'ordering': ('-created',)},
|
options={"ordering": ("-created",)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='documenttype',
|
name="documenttype",
|
||||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='savedview',
|
name="savedview",
|
||||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='tag',
|
name="tag",
|
||||||
options={'ordering': (django.db.models.functions.text.Lower('name'),)},
|
options={"ordering": (django.db.models.functions.text.Lower("name"),)},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -6,24 +6,24 @@ from django.db import migrations
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1008_auto_20201216_1736'),
|
("documents", "1008_auto_20201216_1736"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='correspondent',
|
name="correspondent",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='documenttype',
|
name="documenttype",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='savedview',
|
name="savedview",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='tag',
|
name="tag",
|
||||||
options={'ordering': ('name',)},
|
options={"ordering": ("name",)},
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -6,13 +6,13 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1009_auto_20201216_2005'),
|
("documents", "1009_auto_20201216_2005"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='value',
|
name="value",
|
||||||
field=models.CharField(blank=True, max_length=128, null=True),
|
field=models.CharField(blank=True, max_length=128, null=True),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -10,241 +10,433 @@ class Migration(migrations.Migration):
|
|||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
|
||||||
('documents', '1010_auto_20210101_2159'),
|
("documents", "1010_auto_20210101_2159"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='correspondent',
|
name="correspondent",
|
||||||
options={'ordering': ('name',), 'verbose_name': 'correspondent', 'verbose_name_plural': 'correspondents'},
|
options={
|
||||||
|
"ordering": ("name",),
|
||||||
|
"verbose_name": "correspondent",
|
||||||
|
"verbose_name_plural": "correspondents",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='document',
|
name="document",
|
||||||
options={'ordering': ('-created',), 'verbose_name': 'document', 'verbose_name_plural': 'documents'},
|
options={
|
||||||
|
"ordering": ("-created",),
|
||||||
|
"verbose_name": "document",
|
||||||
|
"verbose_name_plural": "documents",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='documenttype',
|
name="documenttype",
|
||||||
options={'verbose_name': 'document type', 'verbose_name_plural': 'document types'},
|
options={
|
||||||
|
"verbose_name": "document type",
|
||||||
|
"verbose_name_plural": "document types",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='log',
|
name="log",
|
||||||
options={'ordering': ('-created',), 'verbose_name': 'log', 'verbose_name_plural': 'logs'},
|
options={
|
||||||
|
"ordering": ("-created",),
|
||||||
|
"verbose_name": "log",
|
||||||
|
"verbose_name_plural": "logs",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='savedview',
|
name="savedview",
|
||||||
options={'ordering': ('name',), 'verbose_name': 'saved view', 'verbose_name_plural': 'saved views'},
|
options={
|
||||||
|
"ordering": ("name",),
|
||||||
|
"verbose_name": "saved view",
|
||||||
|
"verbose_name_plural": "saved views",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='savedviewfilterrule',
|
name="savedviewfilterrule",
|
||||||
options={'verbose_name': 'filter rule', 'verbose_name_plural': 'filter rules'},
|
options={
|
||||||
|
"verbose_name": "filter rule",
|
||||||
|
"verbose_name_plural": "filter rules",
|
||||||
|
},
|
||||||
),
|
),
|
||||||
migrations.AlterModelOptions(
|
migrations.AlterModelOptions(
|
||||||
name='tag',
|
name="tag",
|
||||||
options={'verbose_name': 'tag', 'verbose_name_plural': 'tags'},
|
options={"verbose_name": "tag", "verbose_name_plural": "tags"},
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='is_insensitive',
|
name="is_insensitive",
|
||||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='match',
|
name="match",
|
||||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any word"),
|
||||||
|
(2, "All words"),
|
||||||
|
(3, "Exact match"),
|
||||||
|
(4, "Regular expression"),
|
||||||
|
(5, "Fuzzy word"),
|
||||||
|
(6, "Automatic"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
verbose_name="matching algorithm",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='correspondent',
|
model_name="correspondent",
|
||||||
name='name',
|
name="name",
|
||||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='added',
|
name="added",
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, editable=False, verbose_name='added'),
|
field=models.DateTimeField(
|
||||||
|
db_index=True,
|
||||||
|
default=django.utils.timezone.now,
|
||||||
|
editable=False,
|
||||||
|
verbose_name="added",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='archive_checksum',
|
name="archive_checksum",
|
||||||
field=models.CharField(blank=True, editable=False, help_text='The checksum of the archived document.', max_length=32, null=True, verbose_name='archive checksum'),
|
field=models.CharField(
|
||||||
|
blank=True,
|
||||||
|
editable=False,
|
||||||
|
help_text="The checksum of the archived document.",
|
||||||
|
max_length=32,
|
||||||
|
null=True,
|
||||||
|
verbose_name="archive checksum",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='archive_serial_number',
|
name="archive_serial_number",
|
||||||
field=models.IntegerField(blank=True, db_index=True, help_text='The position of this document in your physical document archive.', null=True, unique=True, verbose_name='archive serial number'),
|
field=models.IntegerField(
|
||||||
|
blank=True,
|
||||||
|
db_index=True,
|
||||||
|
help_text="The position of this document in your physical document archive.",
|
||||||
|
null=True,
|
||||||
|
unique=True,
|
||||||
|
verbose_name="archive serial number",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='checksum',
|
name="checksum",
|
||||||
field=models.CharField(editable=False, help_text='The checksum of the original document.', max_length=32, unique=True, verbose_name='checksum'),
|
field=models.CharField(
|
||||||
|
editable=False,
|
||||||
|
help_text="The checksum of the original document.",
|
||||||
|
max_length=32,
|
||||||
|
unique=True,
|
||||||
|
verbose_name="checksum",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='content',
|
name="content",
|
||||||
field=models.TextField(blank=True, help_text='The raw, text-only data of the document. This field is primarily used for searching.', verbose_name='content'),
|
field=models.TextField(
|
||||||
|
blank=True,
|
||||||
|
help_text="The raw, text-only data of the document. This field is primarily used for searching.",
|
||||||
|
verbose_name="content",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='correspondent',
|
name="correspondent",
|
||||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.correspondent', verbose_name='correspondent'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.correspondent",
|
||||||
|
verbose_name="correspondent",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='created',
|
name="created",
|
||||||
field=models.DateTimeField(db_index=True, default=django.utils.timezone.now, verbose_name='created'),
|
field=models.DateTimeField(
|
||||||
|
db_index=True, default=django.utils.timezone.now, verbose_name="created"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='document_type',
|
name="document_type",
|
||||||
field=models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='documents', to='documents.documenttype', verbose_name='document type'),
|
field=models.ForeignKey(
|
||||||
|
blank=True,
|
||||||
|
null=True,
|
||||||
|
on_delete=django.db.models.deletion.SET_NULL,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.documenttype",
|
||||||
|
verbose_name="document type",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='filename',
|
name="filename",
|
||||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, verbose_name='filename'),
|
field=models.FilePathField(
|
||||||
|
default=None,
|
||||||
|
editable=False,
|
||||||
|
help_text="Current filename in storage",
|
||||||
|
max_length=1024,
|
||||||
|
null=True,
|
||||||
|
verbose_name="filename",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='mime_type',
|
name="mime_type",
|
||||||
field=models.CharField(editable=False, max_length=256, verbose_name='mime type'),
|
field=models.CharField(
|
||||||
|
editable=False, max_length=256, verbose_name="mime type"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='modified',
|
name="modified",
|
||||||
field=models.DateTimeField(auto_now=True, db_index=True, verbose_name='modified'),
|
field=models.DateTimeField(
|
||||||
|
auto_now=True, db_index=True, verbose_name="modified"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='storage_type',
|
name="storage_type",
|
||||||
field=models.CharField(choices=[('unencrypted', 'Unencrypted'), ('gpg', 'Encrypted with GNU Privacy Guard')], default='unencrypted', editable=False, max_length=11, verbose_name='storage type'),
|
field=models.CharField(
|
||||||
|
choices=[
|
||||||
|
("unencrypted", "Unencrypted"),
|
||||||
|
("gpg", "Encrypted with GNU Privacy Guard"),
|
||||||
|
],
|
||||||
|
default="unencrypted",
|
||||||
|
editable=False,
|
||||||
|
max_length=11,
|
||||||
|
verbose_name="storage type",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='tags',
|
name="tags",
|
||||||
field=models.ManyToManyField(blank=True, related_name='documents', to='documents.Tag', verbose_name='tags'),
|
field=models.ManyToManyField(
|
||||||
|
blank=True,
|
||||||
|
related_name="documents",
|
||||||
|
to="documents.Tag",
|
||||||
|
verbose_name="tags",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='title',
|
name="title",
|
||||||
field=models.CharField(blank=True, db_index=True, max_length=128, verbose_name='title'),
|
field=models.CharField(
|
||||||
|
blank=True, db_index=True, max_length=128, verbose_name="title"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='documenttype',
|
model_name="documenttype",
|
||||||
name='is_insensitive',
|
name="is_insensitive",
|
||||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='documenttype',
|
model_name="documenttype",
|
||||||
name='match',
|
name="match",
|
||||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='documenttype',
|
model_name="documenttype",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any word"),
|
||||||
|
(2, "All words"),
|
||||||
|
(3, "Exact match"),
|
||||||
|
(4, "Regular expression"),
|
||||||
|
(5, "Fuzzy word"),
|
||||||
|
(6, "Automatic"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
verbose_name="matching algorithm",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='documenttype',
|
model_name="documenttype",
|
||||||
name='name',
|
name="name",
|
||||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='created',
|
name="created",
|
||||||
field=models.DateTimeField(auto_now_add=True, verbose_name='created'),
|
field=models.DateTimeField(auto_now_add=True, verbose_name="created"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='group',
|
name="group",
|
||||||
field=models.UUIDField(blank=True, null=True, verbose_name='group'),
|
field=models.UUIDField(blank=True, null=True, verbose_name="group"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='level',
|
name="level",
|
||||||
field=models.PositiveIntegerField(choices=[(10, 'debug'), (20, 'information'), (30, 'warning'), (40, 'error'), (50, 'critical')], default=20, verbose_name='level'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(10, "debug"),
|
||||||
|
(20, "information"),
|
||||||
|
(30, "warning"),
|
||||||
|
(40, "error"),
|
||||||
|
(50, "critical"),
|
||||||
|
],
|
||||||
|
default=20,
|
||||||
|
verbose_name="level",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='log',
|
model_name="log",
|
||||||
name='message',
|
name="message",
|
||||||
field=models.TextField(verbose_name='message'),
|
field=models.TextField(verbose_name="message"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='name',
|
name="name",
|
||||||
field=models.CharField(max_length=128, verbose_name='name'),
|
field=models.CharField(max_length=128, verbose_name="name"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='show_in_sidebar',
|
name="show_in_sidebar",
|
||||||
field=models.BooleanField(verbose_name='show in sidebar'),
|
field=models.BooleanField(verbose_name="show in sidebar"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='show_on_dashboard',
|
name="show_on_dashboard",
|
||||||
field=models.BooleanField(verbose_name='show on dashboard'),
|
field=models.BooleanField(verbose_name="show on dashboard"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='sort_field',
|
name="sort_field",
|
||||||
field=models.CharField(max_length=128, verbose_name='sort field'),
|
field=models.CharField(max_length=128, verbose_name="sort field"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='sort_reverse',
|
name="sort_reverse",
|
||||||
field=models.BooleanField(default=False, verbose_name='sort reverse'),
|
field=models.BooleanField(default=False, verbose_name="sort reverse"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='user',
|
name="user",
|
||||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to=settings.AUTH_USER_MODEL, verbose_name='user'),
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
to=settings.AUTH_USER_MODEL,
|
||||||
|
verbose_name="user",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='rule_type',
|
name="rule_type",
|
||||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag')], verbose_name='rule type'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(0, "title contains"),
|
||||||
|
(1, "content contains"),
|
||||||
|
(2, "ASN is"),
|
||||||
|
(3, "correspondent is"),
|
||||||
|
(4, "document type is"),
|
||||||
|
(5, "is in inbox"),
|
||||||
|
(6, "has tag"),
|
||||||
|
(7, "has any tag"),
|
||||||
|
(8, "created before"),
|
||||||
|
(9, "created after"),
|
||||||
|
(10, "created year is"),
|
||||||
|
(11, "created month is"),
|
||||||
|
(12, "created day is"),
|
||||||
|
(13, "added before"),
|
||||||
|
(14, "added after"),
|
||||||
|
(15, "modified before"),
|
||||||
|
(16, "modified after"),
|
||||||
|
(17, "does not have tag"),
|
||||||
|
],
|
||||||
|
verbose_name="rule type",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='saved_view',
|
name="saved_view",
|
||||||
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='filter_rules', to='documents.savedview', verbose_name='saved view'),
|
field=models.ForeignKey(
|
||||||
|
on_delete=django.db.models.deletion.CASCADE,
|
||||||
|
related_name="filter_rules",
|
||||||
|
to="documents.savedview",
|
||||||
|
verbose_name="saved view",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='value',
|
name="value",
|
||||||
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='value'),
|
field=models.CharField(
|
||||||
|
blank=True, max_length=128, null=True, verbose_name="value"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='colour',
|
name="colour",
|
||||||
field=models.PositiveIntegerField(choices=[(1, '#a6cee3'), (2, '#1f78b4'), (3, '#b2df8a'), (4, '#33a02c'), (5, '#fb9a99'), (6, '#e31a1c'), (7, '#fdbf6f'), (8, '#ff7f00'), (9, '#cab2d6'), (10, '#6a3d9a'), (11, '#b15928'), (12, '#000000'), (13, '#cccccc')], default=1, verbose_name='color'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "#a6cee3"),
|
||||||
|
(2, "#1f78b4"),
|
||||||
|
(3, "#b2df8a"),
|
||||||
|
(4, "#33a02c"),
|
||||||
|
(5, "#fb9a99"),
|
||||||
|
(6, "#e31a1c"),
|
||||||
|
(7, "#fdbf6f"),
|
||||||
|
(8, "#ff7f00"),
|
||||||
|
(9, "#cab2d6"),
|
||||||
|
(10, "#6a3d9a"),
|
||||||
|
(11, "#b15928"),
|
||||||
|
(12, "#000000"),
|
||||||
|
(13, "#cccccc"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
verbose_name="color",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='is_inbox_tag',
|
name="is_inbox_tag",
|
||||||
field=models.BooleanField(default=False, help_text='Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.', verbose_name='is inbox tag'),
|
field=models.BooleanField(
|
||||||
|
default=False,
|
||||||
|
help_text="Marks this tag as an inbox tag: All newly consumed documents will be tagged with inbox tags.",
|
||||||
|
verbose_name="is inbox tag",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='is_insensitive',
|
name="is_insensitive",
|
||||||
field=models.BooleanField(default=True, verbose_name='is insensitive'),
|
field=models.BooleanField(default=True, verbose_name="is insensitive"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='match',
|
name="match",
|
||||||
field=models.CharField(blank=True, max_length=256, verbose_name='match'),
|
field=models.CharField(blank=True, max_length=256, verbose_name="match"),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='matching_algorithm',
|
name="matching_algorithm",
|
||||||
field=models.PositiveIntegerField(choices=[(1, 'Any word'), (2, 'All words'), (3, 'Exact match'), (4, 'Regular expression'), (5, 'Fuzzy word'), (6, 'Automatic')], default=1, verbose_name='matching algorithm'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(1, "Any word"),
|
||||||
|
(2, "All words"),
|
||||||
|
(3, "Exact match"),
|
||||||
|
(4, "Regular expression"),
|
||||||
|
(5, "Fuzzy word"),
|
||||||
|
(6, "Automatic"),
|
||||||
|
],
|
||||||
|
default=1,
|
||||||
|
verbose_name="matching algorithm",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='name',
|
name="name",
|
||||||
field=models.CharField(max_length=128, unique=True, verbose_name='name'),
|
field=models.CharField(max_length=128, unique=True, verbose_name="name"),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -20,6 +20,7 @@ logger = logging.getLogger("paperless.migrations")
|
|||||||
# This is code copied straight paperless before the change.
|
# This is code copied straight paperless before the change.
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
def archive_name_from_filename(filename):
|
def archive_name_from_filename(filename):
|
||||||
return os.path.splitext(filename)[0] + ".pdf"
|
return os.path.splitext(filename)[0] + ".pdf"
|
||||||
|
|
||||||
@ -30,10 +31,7 @@ def archive_path_old(doc):
|
|||||||
else:
|
else:
|
||||||
fname = "{:07}.pdf".format(doc.pk)
|
fname = "{:07}.pdf".format(doc.pk)
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ARCHIVE_DIR, fname)
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
@ -41,10 +39,7 @@ STORAGE_TYPE_GPG = "gpg"
|
|||||||
|
|
||||||
def archive_path_new(doc):
|
def archive_path_new(doc):
|
||||||
if doc.archive_filename is not None:
|
if doc.archive_filename is not None:
|
||||||
return os.path.join(
|
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename))
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
str(doc.archive_filename)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -57,10 +52,7 @@ def source_path(doc):
|
|||||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg" # pragma: no cover
|
fname += ".gpg" # pragma: no cover
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def generate_unique_filename(doc, archive_filename=False):
|
def generate_unique_filename(doc, archive_filename=False):
|
||||||
@ -75,7 +67,8 @@ def generate_unique_filename(doc, archive_filename=False):
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
new_filename = generate_filename(
|
new_filename = generate_filename(
|
||||||
doc, counter, archive_filename=archive_filename)
|
doc, counter, archive_filename=archive_filename
|
||||||
|
)
|
||||||
if new_filename == old_filename:
|
if new_filename == old_filename:
|
||||||
# still the same as before.
|
# still the same as before.
|
||||||
return new_filename
|
return new_filename
|
||||||
@ -91,14 +84,11 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
if settings.PAPERLESS_FILENAME_FORMAT is not None:
|
||||||
tags = defaultdictNoStr(lambda: slugify(None),
|
tags = defaultdictNoStr(lambda: slugify(None), many_to_dictionary(doc.tags))
|
||||||
many_to_dictionary(doc.tags))
|
|
||||||
|
|
||||||
tag_list = pathvalidate.sanitize_filename(
|
tag_list = pathvalidate.sanitize_filename(
|
||||||
",".join(sorted(
|
",".join(sorted([tag.name for tag in doc.tags.all()])),
|
||||||
[tag.name for tag in doc.tags.all()]
|
replacement_text="-",
|
||||||
)),
|
|
||||||
replacement_text="-"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if doc.correspondent:
|
if doc.correspondent:
|
||||||
@ -116,20 +106,21 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
document_type = "none"
|
document_type = "none"
|
||||||
|
|
||||||
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
path = settings.PAPERLESS_FILENAME_FORMAT.format(
|
||||||
title=pathvalidate.sanitize_filename(
|
title=pathvalidate.sanitize_filename(doc.title, replacement_text="-"),
|
||||||
doc.title, replacement_text="-"),
|
|
||||||
correspondent=correspondent,
|
correspondent=correspondent,
|
||||||
document_type=document_type,
|
document_type=document_type,
|
||||||
created=datetime.date.isoformat(doc.created),
|
created=datetime.date.isoformat(doc.created),
|
||||||
created_year=doc.created.year if doc.created else "none",
|
created_year=doc.created.year if doc.created else "none",
|
||||||
created_month=f"{doc.created.month:02}" if doc.created else "none", # NOQA: E501
|
created_month=f"{doc.created.month:02}"
|
||||||
|
if doc.created
|
||||||
|
else "none", # NOQA: E501
|
||||||
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
created_day=f"{doc.created.day:02}" if doc.created else "none",
|
||||||
added=datetime.date.isoformat(doc.added),
|
added=datetime.date.isoformat(doc.added),
|
||||||
added_year=doc.added.year if doc.added else "none",
|
added_year=doc.added.year if doc.added else "none",
|
||||||
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
added_month=f"{doc.added.month:02}" if doc.added else "none",
|
||||||
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
added_day=f"{doc.added.day:02}" if doc.added else "none",
|
||||||
tags=tags,
|
tags=tags,
|
||||||
tag_list=tag_list
|
tag_list=tag_list,
|
||||||
).strip()
|
).strip()
|
||||||
|
|
||||||
path = path.strip(os.sep)
|
path = path.strip(os.sep)
|
||||||
@ -137,7 +128,8 @@ def generate_filename(doc, counter=0, append_gpg=True, archive_filename=False):
|
|||||||
except (ValueError, KeyError, IndexError):
|
except (ValueError, KeyError, IndexError):
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
f"Invalid PAPERLESS_FILENAME_FORMAT: "
|
||||||
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default")
|
f"{settings.PAPERLESS_FILENAME_FORMAT}, falling back to default"
|
||||||
|
)
|
||||||
|
|
||||||
counter_str = f"_{counter:02}" if counter else ""
|
counter_str = f"_{counter:02}" if counter else ""
|
||||||
|
|
||||||
@ -166,29 +158,29 @@ def parse_wrapper(parser, path, mime_type, file_name):
|
|||||||
|
|
||||||
|
|
||||||
def create_archive_version(doc, retry_count=3):
|
def create_archive_version(doc, retry_count=3):
|
||||||
from documents.parsers import get_parser_class_for_mime_type, \
|
from documents.parsers import (
|
||||||
DocumentParser, \
|
get_parser_class_for_mime_type,
|
||||||
ParseError
|
DocumentParser,
|
||||||
|
ParseError,
|
||||||
logger.info(
|
|
||||||
f"Regenerating archive document for document ID:{doc.id}"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
logger.info(f"Regenerating archive document for document ID:{doc.id}")
|
||||||
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
parser_class = get_parser_class_for_mime_type(doc.mime_type)
|
||||||
for try_num in range(retry_count):
|
for try_num in range(retry_count):
|
||||||
parser: DocumentParser = parser_class(None, None)
|
parser: DocumentParser = parser_class(None, None)
|
||||||
try:
|
try:
|
||||||
parse_wrapper(parser, source_path(doc), doc.mime_type,
|
parse_wrapper(
|
||||||
os.path.basename(doc.filename))
|
parser, source_path(doc), doc.mime_type, os.path.basename(doc.filename)
|
||||||
|
)
|
||||||
doc.content = parser.get_text()
|
doc.content = parser.get_text()
|
||||||
|
|
||||||
if parser.get_archive_path() and os.path.isfile(
|
if parser.get_archive_path() and os.path.isfile(parser.get_archive_path()):
|
||||||
parser.get_archive_path()):
|
|
||||||
doc.archive_filename = generate_unique_filename(
|
doc.archive_filename = generate_unique_filename(
|
||||||
doc, archive_filename=True)
|
doc, archive_filename=True
|
||||||
|
)
|
||||||
with open(parser.get_archive_path(), "rb") as f:
|
with open(parser.get_archive_path(), "rb") as f:
|
||||||
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
doc.archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
os.makedirs(os.path.dirname(archive_path_new(doc)),
|
os.makedirs(os.path.dirname(archive_path_new(doc)), exist_ok=True)
|
||||||
exist_ok=True)
|
|
||||||
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
shutil.copy2(parser.get_archive_path(), archive_path_new(doc))
|
||||||
else:
|
else:
|
||||||
doc.archive_checksum = None
|
doc.archive_checksum = None
|
||||||
@ -241,8 +233,8 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
old_path = archive_path_old(doc)
|
old_path = archive_path_old(doc)
|
||||||
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
if doc.id not in affected_document_ids and not os.path.isfile(old_path):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Archived document ID:{doc.id} does not exist at: "
|
f"Archived document ID:{doc.id} does not exist at: " f"{old_path}"
|
||||||
f"{old_path}")
|
)
|
||||||
|
|
||||||
# check that we can regenerate affected archive versions
|
# check that we can regenerate affected archive versions
|
||||||
for doc_id in affected_document_ids:
|
for doc_id in affected_document_ids:
|
||||||
@ -253,7 +245,8 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
if not parser_class:
|
if not parser_class:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Document ID:{doc.id} has an invalid archived document, "
|
f"Document ID:{doc.id} has an invalid archived document, "
|
||||||
f"but no parsers are available. Cannot migrate.")
|
f"but no parsers are available. Cannot migrate."
|
||||||
|
)
|
||||||
|
|
||||||
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
for doc in Document.objects.filter(archive_checksum__isnull=False):
|
||||||
|
|
||||||
@ -261,9 +254,7 @@ def move_old_to_new_locations(apps, schema_editor):
|
|||||||
old_path = archive_path_old(doc)
|
old_path = archive_path_old(doc)
|
||||||
# remove affected archive versions
|
# remove affected archive versions
|
||||||
if os.path.isfile(old_path):
|
if os.path.isfile(old_path):
|
||||||
logger.debug(
|
logger.debug(f"Removing {old_path}")
|
||||||
f"Removing {old_path}"
|
|
||||||
)
|
|
||||||
os.unlink(old_path)
|
os.unlink(old_path)
|
||||||
else:
|
else:
|
||||||
# Set archive path for unaffected files
|
# Set archive path for unaffected files
|
||||||
@ -290,7 +281,8 @@ def move_new_to_old_locations(apps, schema_editor):
|
|||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Cannot migrate: Archive file name {old_archive_path} of "
|
f"Cannot migrate: Archive file name {old_archive_path} of "
|
||||||
f"document {doc.filename} would clash with another archive "
|
f"document {doc.filename} would clash with another archive "
|
||||||
f"filename.")
|
f"filename."
|
||||||
|
)
|
||||||
old_archive_paths.add(old_archive_path)
|
old_archive_paths.add(old_archive_path)
|
||||||
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
if new_archive_path != old_archive_path and os.path.isfile(old_archive_path):
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -309,22 +301,35 @@ def move_new_to_old_locations(apps, schema_editor):
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1011_auto_20210101_2340'),
|
("documents", "1011_auto_20210101_2340"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='archive_filename',
|
name="archive_filename",
|
||||||
field=models.FilePathField(default=None, editable=False, help_text='Current archive filename in storage', max_length=1024, null=True, unique=True, verbose_name='archive filename'),
|
field=models.FilePathField(
|
||||||
|
default=None,
|
||||||
|
editable=False,
|
||||||
|
help_text="Current archive filename in storage",
|
||||||
|
max_length=1024,
|
||||||
|
null=True,
|
||||||
|
unique=True,
|
||||||
|
verbose_name="archive filename",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='document',
|
model_name="document",
|
||||||
name='filename',
|
name="filename",
|
||||||
field=models.FilePathField(default=None, editable=False, help_text='Current filename in storage', max_length=1024, null=True, unique=True, verbose_name='filename'),
|
field=models.FilePathField(
|
||||||
|
default=None,
|
||||||
|
editable=False,
|
||||||
|
help_text="Current filename in storage",
|
||||||
|
max_length=1024,
|
||||||
|
null=True,
|
||||||
|
unique=True,
|
||||||
|
verbose_name="filename",
|
||||||
),
|
),
|
||||||
migrations.RunPython(
|
|
||||||
move_old_to_new_locations,
|
|
||||||
move_new_to_old_locations
|
|
||||||
),
|
),
|
||||||
|
migrations.RunPython(move_old_to_new_locations, move_new_to_old_locations),
|
||||||
]
|
]
|
||||||
|
@ -20,7 +20,7 @@ COLOURS_OLD = {
|
|||||||
|
|
||||||
|
|
||||||
def forward(apps, schema_editor):
|
def forward(apps, schema_editor):
|
||||||
Tag = apps.get_model('documents', 'Tag')
|
Tag = apps.get_model("documents", "Tag")
|
||||||
|
|
||||||
for tag in Tag.objects.all():
|
for tag in Tag.objects.all():
|
||||||
colour_old_id = tag.colour_old
|
colour_old_id = tag.colour_old
|
||||||
@ -30,7 +30,7 @@ def forward(apps, schema_editor):
|
|||||||
|
|
||||||
|
|
||||||
def reverse(apps, schema_editor):
|
def reverse(apps, schema_editor):
|
||||||
Tag = apps.get_model('documents', 'Tag')
|
Tag = apps.get_model("documents", "Tag")
|
||||||
|
|
||||||
def _get_colour_id(rdb):
|
def _get_colour_id(rdb):
|
||||||
for idx, rdbx in COLOURS_OLD.items():
|
for idx, rdbx in COLOURS_OLD.items():
|
||||||
@ -48,23 +48,25 @@ def reverse(apps, schema_editor):
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1012_fix_archive_files'),
|
("documents", "1012_fix_archive_files"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.RenameField(
|
migrations.RenameField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
old_name='colour',
|
old_name="colour",
|
||||||
new_name='colour_old',
|
new_name="colour_old",
|
||||||
),
|
),
|
||||||
migrations.AddField(
|
migrations.AddField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='color',
|
name="color",
|
||||||
field=models.CharField(default='#a6cee3', max_length=7, verbose_name='color'),
|
field=models.CharField(
|
||||||
|
default="#a6cee3", max_length=7, verbose_name="color"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.RunPython(forward, reverse),
|
migrations.RunPython(forward, reverse),
|
||||||
migrations.RemoveField(
|
migrations.RemoveField(
|
||||||
model_name='tag',
|
model_name="tag",
|
||||||
name='colour_old',
|
name="colour_old",
|
||||||
)
|
),
|
||||||
]
|
]
|
||||||
|
@ -6,13 +6,37 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1013_migrate_tag_colour'),
|
("documents", "1013_migrate_tag_colour"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='rule_type',
|
name="rule_type",
|
||||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag'), (18, 'does not have ASN'), (19, 'title or content contains')], verbose_name='rule type'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(0, "title contains"),
|
||||||
|
(1, "content contains"),
|
||||||
|
(2, "ASN is"),
|
||||||
|
(3, "correspondent is"),
|
||||||
|
(4, "document type is"),
|
||||||
|
(5, "is in inbox"),
|
||||||
|
(6, "has tag"),
|
||||||
|
(7, "has any tag"),
|
||||||
|
(8, "created before"),
|
||||||
|
(9, "created after"),
|
||||||
|
(10, "created year is"),
|
||||||
|
(11, "created month is"),
|
||||||
|
(12, "created day is"),
|
||||||
|
(13, "added before"),
|
||||||
|
(14, "added after"),
|
||||||
|
(15, "modified before"),
|
||||||
|
(16, "modified after"),
|
||||||
|
(17, "does not have tag"),
|
||||||
|
(18, "does not have ASN"),
|
||||||
|
(19, "title or content contains"),
|
||||||
|
],
|
||||||
|
verbose_name="rule type",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -8,20 +8,20 @@ logger = logging.getLogger("paperless.migrations")
|
|||||||
|
|
||||||
|
|
||||||
def remove_null_characters(apps, schema_editor):
|
def remove_null_characters(apps, schema_editor):
|
||||||
Document = apps.get_model('documents', 'Document')
|
Document = apps.get_model("documents", "Document")
|
||||||
|
|
||||||
for doc in Document.objects.all():
|
for doc in Document.objects.all():
|
||||||
content: str = doc.content
|
content: str = doc.content
|
||||||
if '\0' in content:
|
if "\0" in content:
|
||||||
logger.info(f"Removing null characters from document {doc}...")
|
logger.info(f"Removing null characters from document {doc}...")
|
||||||
doc.content = content.replace('\0', ' ')
|
doc.content = content.replace("\0", " ")
|
||||||
doc.save()
|
doc.save()
|
||||||
|
|
||||||
|
|
||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1014_auto_20210228_1614'),
|
("documents", "1014_auto_20210228_1614"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
|
@ -6,18 +6,46 @@ from django.db import migrations, models
|
|||||||
class Migration(migrations.Migration):
|
class Migration(migrations.Migration):
|
||||||
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
('documents', '1015_remove_null_characters'),
|
("documents", "1015_remove_null_characters"),
|
||||||
]
|
]
|
||||||
|
|
||||||
operations = [
|
operations = [
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedview',
|
model_name="savedview",
|
||||||
name='sort_field',
|
name="sort_field",
|
||||||
field=models.CharField(blank=True, max_length=128, null=True, verbose_name='sort field'),
|
field=models.CharField(
|
||||||
|
blank=True, max_length=128, null=True, verbose_name="sort field"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
migrations.AlterField(
|
migrations.AlterField(
|
||||||
model_name='savedviewfilterrule',
|
model_name="savedviewfilterrule",
|
||||||
name='rule_type',
|
name="rule_type",
|
||||||
field=models.PositiveIntegerField(choices=[(0, 'title contains'), (1, 'content contains'), (2, 'ASN is'), (3, 'correspondent is'), (4, 'document type is'), (5, 'is in inbox'), (6, 'has tag'), (7, 'has any tag'), (8, 'created before'), (9, 'created after'), (10, 'created year is'), (11, 'created month is'), (12, 'created day is'), (13, 'added before'), (14, 'added after'), (15, 'modified before'), (16, 'modified after'), (17, 'does not have tag'), (18, 'does not have ASN'), (19, 'title or content contains'), (20, 'fulltext query'), (21, 'more like this')], verbose_name='rule type'),
|
field=models.PositiveIntegerField(
|
||||||
|
choices=[
|
||||||
|
(0, "title contains"),
|
||||||
|
(1, "content contains"),
|
||||||
|
(2, "ASN is"),
|
||||||
|
(3, "correspondent is"),
|
||||||
|
(4, "document type is"),
|
||||||
|
(5, "is in inbox"),
|
||||||
|
(6, "has tag"),
|
||||||
|
(7, "has any tag"),
|
||||||
|
(8, "created before"),
|
||||||
|
(9, "created after"),
|
||||||
|
(10, "created year is"),
|
||||||
|
(11, "created month is"),
|
||||||
|
(12, "created day is"),
|
||||||
|
(13, "added before"),
|
||||||
|
(14, "added after"),
|
||||||
|
(15, "modified before"),
|
||||||
|
(16, "modified after"),
|
||||||
|
(17, "does not have tag"),
|
||||||
|
(18, "does not have ASN"),
|
||||||
|
(19, "title or content contains"),
|
||||||
|
(20, "fulltext query"),
|
||||||
|
(21, "more like this"),
|
||||||
|
],
|
||||||
|
verbose_name="rule type",
|
||||||
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
@ -37,23 +37,15 @@ class MatchingModel(models.Model):
|
|||||||
(MATCH_AUTO, _("Automatic")),
|
(MATCH_AUTO, _("Automatic")),
|
||||||
)
|
)
|
||||||
|
|
||||||
name = models.CharField(
|
name = models.CharField(_("name"), max_length=128, unique=True)
|
||||||
_("name"),
|
|
||||||
max_length=128, unique=True)
|
|
||||||
|
|
||||||
match = models.CharField(
|
match = models.CharField(_("match"), max_length=256, blank=True)
|
||||||
_("match"),
|
|
||||||
max_length=256, blank=True)
|
|
||||||
|
|
||||||
matching_algorithm = models.PositiveIntegerField(
|
matching_algorithm = models.PositiveIntegerField(
|
||||||
_("matching algorithm"),
|
_("matching algorithm"), choices=MATCHING_ALGORITHMS, default=MATCH_ANY
|
||||||
choices=MATCHING_ALGORITHMS,
|
|
||||||
default=MATCH_ANY
|
|
||||||
)
|
)
|
||||||
|
|
||||||
is_insensitive = models.BooleanField(
|
is_insensitive = models.BooleanField(_("is insensitive"), default=True)
|
||||||
_("is insensitive"),
|
|
||||||
default=True)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
abstract = True
|
abstract = True
|
||||||
@ -64,7 +56,6 @@ class MatchingModel(models.Model):
|
|||||||
|
|
||||||
|
|
||||||
class Correspondent(MatchingModel):
|
class Correspondent(MatchingModel):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
ordering = ("name",)
|
ordering = ("name",)
|
||||||
verbose_name = _("correspondent")
|
verbose_name = _("correspondent")
|
||||||
@ -73,17 +64,15 @@ class Correspondent(MatchingModel):
|
|||||||
|
|
||||||
class Tag(MatchingModel):
|
class Tag(MatchingModel):
|
||||||
|
|
||||||
color = models.CharField(
|
color = models.CharField(_("color"), max_length=7, default="#a6cee3")
|
||||||
_("color"),
|
|
||||||
max_length=7,
|
|
||||||
default="#a6cee3"
|
|
||||||
)
|
|
||||||
|
|
||||||
is_inbox_tag = models.BooleanField(
|
is_inbox_tag = models.BooleanField(
|
||||||
_("is inbox tag"),
|
_("is inbox tag"),
|
||||||
default=False,
|
default=False,
|
||||||
help_text=_("Marks this tag as an inbox tag: All newly consumed "
|
help_text=_(
|
||||||
"documents will be tagged with inbox tags.")
|
"Marks this tag as an inbox tag: All newly consumed "
|
||||||
|
"documents will be tagged with inbox tags."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@ -92,7 +81,6 @@ class Tag(MatchingModel):
|
|||||||
|
|
||||||
|
|
||||||
class DocumentType(MatchingModel):
|
class DocumentType(MatchingModel):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = _("document type")
|
verbose_name = _("document type")
|
||||||
verbose_name_plural = _("document types")
|
verbose_name_plural = _("document types")
|
||||||
@ -104,7 +92,7 @@ class Document(models.Model):
|
|||||||
STORAGE_TYPE_GPG = "gpg"
|
STORAGE_TYPE_GPG = "gpg"
|
||||||
STORAGE_TYPES = (
|
STORAGE_TYPES = (
|
||||||
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
|
(STORAGE_TYPE_UNENCRYPTED, _("Unencrypted")),
|
||||||
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard"))
|
(STORAGE_TYPE_GPG, _("Encrypted with GNU Privacy Guard")),
|
||||||
)
|
)
|
||||||
|
|
||||||
correspondent = models.ForeignKey(
|
correspondent = models.ForeignKey(
|
||||||
@ -113,12 +101,10 @@ class Document(models.Model):
|
|||||||
null=True,
|
null=True,
|
||||||
related_name="documents",
|
related_name="documents",
|
||||||
on_delete=models.SET_NULL,
|
on_delete=models.SET_NULL,
|
||||||
verbose_name=_("correspondent")
|
verbose_name=_("correspondent"),
|
||||||
)
|
)
|
||||||
|
|
||||||
title = models.CharField(
|
title = models.CharField(_("title"), max_length=128, blank=True, db_index=True)
|
||||||
_("title"),
|
|
||||||
max_length=128, blank=True, db_index=True)
|
|
||||||
|
|
||||||
document_type = models.ForeignKey(
|
document_type = models.ForeignKey(
|
||||||
DocumentType,
|
DocumentType,
|
||||||
@ -126,25 +112,22 @@ class Document(models.Model):
|
|||||||
null=True,
|
null=True,
|
||||||
related_name="documents",
|
related_name="documents",
|
||||||
on_delete=models.SET_NULL,
|
on_delete=models.SET_NULL,
|
||||||
verbose_name=_("document type")
|
verbose_name=_("document type"),
|
||||||
)
|
)
|
||||||
|
|
||||||
content = models.TextField(
|
content = models.TextField(
|
||||||
_("content"),
|
_("content"),
|
||||||
blank=True,
|
blank=True,
|
||||||
help_text=_("The raw, text-only data of the document. This field is "
|
help_text=_(
|
||||||
"primarily used for searching.")
|
"The raw, text-only data of the document. This field is "
|
||||||
|
"primarily used for searching."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
mime_type = models.CharField(
|
mime_type = models.CharField(_("mime type"), max_length=256, editable=False)
|
||||||
_("mime type"),
|
|
||||||
max_length=256,
|
|
||||||
editable=False
|
|
||||||
)
|
|
||||||
|
|
||||||
tags = models.ManyToManyField(
|
tags = models.ManyToManyField(
|
||||||
Tag, related_name="documents", blank=True,
|
Tag, related_name="documents", blank=True, verbose_name=_("tags")
|
||||||
verbose_name=_("tags")
|
|
||||||
)
|
)
|
||||||
|
|
||||||
checksum = models.CharField(
|
checksum = models.CharField(
|
||||||
@ -152,7 +135,7 @@ class Document(models.Model):
|
|||||||
max_length=32,
|
max_length=32,
|
||||||
editable=False,
|
editable=False,
|
||||||
unique=True,
|
unique=True,
|
||||||
help_text=_("The checksum of the original document.")
|
help_text=_("The checksum of the original document."),
|
||||||
)
|
)
|
||||||
|
|
||||||
archive_checksum = models.CharField(
|
archive_checksum = models.CharField(
|
||||||
@ -161,28 +144,26 @@ class Document(models.Model):
|
|||||||
editable=False,
|
editable=False,
|
||||||
blank=True,
|
blank=True,
|
||||||
null=True,
|
null=True,
|
||||||
help_text=_("The checksum of the archived document.")
|
help_text=_("The checksum of the archived document."),
|
||||||
)
|
)
|
||||||
|
|
||||||
created = models.DateTimeField(
|
created = models.DateTimeField(_("created"), default=timezone.now, db_index=True)
|
||||||
_("created"),
|
|
||||||
default=timezone.now, db_index=True)
|
|
||||||
|
|
||||||
modified = models.DateTimeField(
|
modified = models.DateTimeField(
|
||||||
_("modified"),
|
_("modified"), auto_now=True, editable=False, db_index=True
|
||||||
auto_now=True, editable=False, db_index=True)
|
)
|
||||||
|
|
||||||
storage_type = models.CharField(
|
storage_type = models.CharField(
|
||||||
_("storage type"),
|
_("storage type"),
|
||||||
max_length=11,
|
max_length=11,
|
||||||
choices=STORAGE_TYPES,
|
choices=STORAGE_TYPES,
|
||||||
default=STORAGE_TYPE_UNENCRYPTED,
|
default=STORAGE_TYPE_UNENCRYPTED,
|
||||||
editable=False
|
editable=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
added = models.DateTimeField(
|
added = models.DateTimeField(
|
||||||
_("added"),
|
_("added"), default=timezone.now, editable=False, db_index=True
|
||||||
default=timezone.now, editable=False, db_index=True)
|
)
|
||||||
|
|
||||||
filename = models.FilePathField(
|
filename = models.FilePathField(
|
||||||
_("filename"),
|
_("filename"),
|
||||||
@ -191,7 +172,7 @@ class Document(models.Model):
|
|||||||
default=None,
|
default=None,
|
||||||
unique=True,
|
unique=True,
|
||||||
null=True,
|
null=True,
|
||||||
help_text=_("Current filename in storage")
|
help_text=_("Current filename in storage"),
|
||||||
)
|
)
|
||||||
|
|
||||||
archive_filename = models.FilePathField(
|
archive_filename = models.FilePathField(
|
||||||
@ -201,7 +182,7 @@ class Document(models.Model):
|
|||||||
default=None,
|
default=None,
|
||||||
unique=True,
|
unique=True,
|
||||||
null=True,
|
null=True,
|
||||||
help_text=_("Current archive filename in storage")
|
help_text=_("Current archive filename in storage"),
|
||||||
)
|
)
|
||||||
|
|
||||||
archive_serial_number = models.IntegerField(
|
archive_serial_number = models.IntegerField(
|
||||||
@ -210,8 +191,9 @@ class Document(models.Model):
|
|||||||
null=True,
|
null=True,
|
||||||
unique=True,
|
unique=True,
|
||||||
db_index=True,
|
db_index=True,
|
||||||
help_text=_("The position of this document in your physical document "
|
help_text=_(
|
||||||
"archive.")
|
"The position of this document in your physical document " "archive."
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
@ -238,10 +220,7 @@ class Document(models.Model):
|
|||||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg" # pragma: no cover
|
fname += ".gpg" # pragma: no cover
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def source_file(self):
|
def source_file(self):
|
||||||
@ -254,10 +233,7 @@ class Document(models.Model):
|
|||||||
@property
|
@property
|
||||||
def archive_path(self):
|
def archive_path(self):
|
||||||
if self.has_archive_version:
|
if self.has_archive_version:
|
||||||
return os.path.join(
|
return os.path.join(settings.ARCHIVE_DIR, str(self.archive_filename))
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
str(self.archive_filename)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -291,10 +267,7 @@ class Document(models.Model):
|
|||||||
if self.storage_type == self.STORAGE_TYPE_GPG:
|
if self.storage_type == self.STORAGE_TYPE_GPG:
|
||||||
file_name += ".gpg"
|
file_name += ".gpg"
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.THUMBNAIL_DIR, file_name)
|
||||||
settings.THUMBNAIL_DIR,
|
|
||||||
file_name
|
|
||||||
)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def thumbnail_file(self):
|
def thumbnail_file(self):
|
||||||
@ -311,15 +284,13 @@ class Log(models.Model):
|
|||||||
(logging.CRITICAL, _("critical")),
|
(logging.CRITICAL, _("critical")),
|
||||||
)
|
)
|
||||||
|
|
||||||
group = models.UUIDField(
|
group = models.UUIDField(_("group"), blank=True, null=True)
|
||||||
_("group"),
|
|
||||||
blank=True, null=True)
|
|
||||||
|
|
||||||
message = models.TextField(_("message"))
|
message = models.TextField(_("message"))
|
||||||
|
|
||||||
level = models.PositiveIntegerField(
|
level = models.PositiveIntegerField(
|
||||||
_("level"),
|
_("level"), choices=LEVELS, default=logging.INFO
|
||||||
choices=LEVELS, default=logging.INFO)
|
)
|
||||||
|
|
||||||
created = models.DateTimeField(_("created"), auto_now_add=True)
|
created = models.DateTimeField(_("created"), auto_now_add=True)
|
||||||
|
|
||||||
@ -333,18 +304,14 @@ class Log(models.Model):
|
|||||||
|
|
||||||
|
|
||||||
class SavedView(models.Model):
|
class SavedView(models.Model):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
|
|
||||||
ordering = ("name",)
|
ordering = ("name",)
|
||||||
verbose_name = _("saved view")
|
verbose_name = _("saved view")
|
||||||
verbose_name_plural = _("saved views")
|
verbose_name_plural = _("saved views")
|
||||||
|
|
||||||
user = models.ForeignKey(User, on_delete=models.CASCADE,
|
user = models.ForeignKey(User, on_delete=models.CASCADE, verbose_name=_("user"))
|
||||||
verbose_name=_("user"))
|
name = models.CharField(_("name"), max_length=128)
|
||||||
name = models.CharField(
|
|
||||||
_("name"),
|
|
||||||
max_length=128)
|
|
||||||
|
|
||||||
show_on_dashboard = models.BooleanField(
|
show_on_dashboard = models.BooleanField(
|
||||||
_("show on dashboard"),
|
_("show on dashboard"),
|
||||||
@ -354,14 +321,9 @@ class SavedView(models.Model):
|
|||||||
)
|
)
|
||||||
|
|
||||||
sort_field = models.CharField(
|
sort_field = models.CharField(
|
||||||
_("sort field"),
|
_("sort field"), max_length=128, null=True, blank=True
|
||||||
max_length=128,
|
|
||||||
null=True,
|
|
||||||
blank=True
|
|
||||||
)
|
)
|
||||||
sort_reverse = models.BooleanField(
|
sort_reverse = models.BooleanField(_("sort reverse"), default=False)
|
||||||
_("sort reverse"),
|
|
||||||
default=False)
|
|
||||||
|
|
||||||
|
|
||||||
class SavedViewFilterRule(models.Model):
|
class SavedViewFilterRule(models.Model):
|
||||||
@ -388,25 +350,19 @@ class SavedViewFilterRule(models.Model):
|
|||||||
(19, _("title or content contains")),
|
(19, _("title or content contains")),
|
||||||
(20, _("fulltext query")),
|
(20, _("fulltext query")),
|
||||||
(21, _("more like this")),
|
(21, _("more like this")),
|
||||||
(22, _("has tags in"))
|
(22, _("has tags in")),
|
||||||
]
|
]
|
||||||
|
|
||||||
saved_view = models.ForeignKey(
|
saved_view = models.ForeignKey(
|
||||||
SavedView,
|
SavedView,
|
||||||
on_delete=models.CASCADE,
|
on_delete=models.CASCADE,
|
||||||
related_name="filter_rules",
|
related_name="filter_rules",
|
||||||
verbose_name=_("saved view")
|
verbose_name=_("saved view"),
|
||||||
)
|
)
|
||||||
|
|
||||||
rule_type = models.PositiveIntegerField(
|
rule_type = models.PositiveIntegerField(_("rule type"), choices=RULE_TYPES)
|
||||||
_("rule type"),
|
|
||||||
choices=RULE_TYPES)
|
|
||||||
|
|
||||||
value = models.CharField(
|
value = models.CharField(_("value"), max_length=128, blank=True, null=True)
|
||||||
_("value"),
|
|
||||||
max_length=128,
|
|
||||||
blank=True,
|
|
||||||
null=True)
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
verbose_name = _("filter rule")
|
verbose_name = _("filter rule")
|
||||||
@ -416,20 +372,23 @@ class SavedViewFilterRule(models.Model):
|
|||||||
# TODO: why is this in the models file?
|
# TODO: why is this in the models file?
|
||||||
class FileInfo:
|
class FileInfo:
|
||||||
|
|
||||||
REGEXES = OrderedDict([
|
REGEXES = OrderedDict(
|
||||||
("created-title", re.compile(
|
[
|
||||||
|
(
|
||||||
|
"created-title",
|
||||||
|
re.compile(
|
||||||
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
|
||||||
r"(?P<title>.*)$",
|
r"(?P<title>.*)$",
|
||||||
flags=re.IGNORECASE
|
flags=re.IGNORECASE,
|
||||||
)),
|
),
|
||||||
("title", re.compile(
|
),
|
||||||
r"(?P<title>.*)$",
|
("title", re.compile(r"(?P<title>.*)$", flags=re.IGNORECASE)),
|
||||||
flags=re.IGNORECASE
|
]
|
||||||
))
|
)
|
||||||
])
|
|
||||||
|
|
||||||
def __init__(self, created=None, correspondent=None, title=None, tags=(),
|
def __init__(
|
||||||
extension=None):
|
self, created=None, correspondent=None, title=None, tags=(), extension=None
|
||||||
|
):
|
||||||
|
|
||||||
self.created = created
|
self.created = created
|
||||||
self.title = title
|
self.title = title
|
||||||
@ -451,9 +410,7 @@ class FileInfo:
|
|||||||
@classmethod
|
@classmethod
|
||||||
def _mangle_property(cls, properties, name):
|
def _mangle_property(cls, properties, name):
|
||||||
if name in properties:
|
if name in properties:
|
||||||
properties[name] = getattr(cls, "_get_{}".format(name))(
|
properties[name] = getattr(cls, "_get_{}".format(name))(properties[name])
|
||||||
properties[name]
|
|
||||||
)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_filename(cls, filename):
|
def from_filename(cls, filename):
|
||||||
|
@ -27,11 +27,11 @@ from documents.signals import document_consumer_declaration
|
|||||||
# TODO: isnt there a date parsing library for this?
|
# TODO: isnt there a date parsing library for this?
|
||||||
|
|
||||||
DATE_REGEX = re.compile(
|
DATE_REGEX = re.compile(
|
||||||
r'(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|' # NOQA: E501
|
r"(\b|(?!=([_-])))([0-9]{1,2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{4}|[0-9]{2})(\b|(?=([_-])))|" # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|' # NOQA: E501
|
r"(\b|(?!=([_-])))([0-9]{4}|[0-9]{2})[\.\/-]([0-9]{1,2})[\.\/-]([0-9]{1,2})(\b|(?=([_-])))|" # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|' # NOQA: E501
|
r"(\b|(?!=([_-])))([0-9]{1,2}[\. ]+[^ ]{3,9} ([0-9]{4}|[0-9]{2}))(\b|(?=([_-])))|" # NOQA: E501
|
||||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|'
|
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{1,2}, ([0-9]{4}))(\b|(?=([_-])))|"
|
||||||
r'(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))'
|
r"(\b|(?!=([_-])))([^\W\d_]{3,9} [0-9]{4})(\b|(?=([_-])))"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -93,8 +93,7 @@ def get_parser_class_for_mime_type(mime_type):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
# Return the parser with the highest weight.
|
# Return the parser with the highest weight.
|
||||||
return sorted(
|
return sorted(options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
||||||
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
|
|
||||||
|
|
||||||
|
|
||||||
def get_parser_class(path):
|
def get_parser_class(path):
|
||||||
@ -107,7 +106,8 @@ def get_parser_class(path):
|
|||||||
return get_parser_class_for_mime_type(mime_type)
|
return get_parser_class_for_mime_type(mime_type)
|
||||||
|
|
||||||
|
|
||||||
def run_convert(input_file,
|
def run_convert(
|
||||||
|
input_file,
|
||||||
output_file,
|
output_file,
|
||||||
density=None,
|
density=None,
|
||||||
scale=None,
|
scale=None,
|
||||||
@ -118,7 +118,8 @@ def run_convert(input_file,
|
|||||||
depth=None,
|
depth=None,
|
||||||
auto_orient=False,
|
auto_orient=False,
|
||||||
extra=None,
|
extra=None,
|
||||||
logging_group=None):
|
logging_group=None,
|
||||||
|
):
|
||||||
|
|
||||||
environment = os.environ.copy()
|
environment = os.environ.copy()
|
||||||
if settings.CONVERT_MEMORY_LIMIT:
|
if settings.CONVERT_MEMORY_LIMIT:
|
||||||
@ -127,17 +128,17 @@ def run_convert(input_file,
|
|||||||
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
|
||||||
|
|
||||||
args = [settings.CONVERT_BINARY]
|
args = [settings.CONVERT_BINARY]
|
||||||
args += ['-density', str(density)] if density else []
|
args += ["-density", str(density)] if density else []
|
||||||
args += ['-scale', str(scale)] if scale else []
|
args += ["-scale", str(scale)] if scale else []
|
||||||
args += ['-alpha', str(alpha)] if alpha else []
|
args += ["-alpha", str(alpha)] if alpha else []
|
||||||
args += ['-strip'] if strip else []
|
args += ["-strip"] if strip else []
|
||||||
args += ['-trim'] if trim else []
|
args += ["-trim"] if trim else []
|
||||||
args += ['-type', str(type)] if type else []
|
args += ["-type", str(type)] if type else []
|
||||||
args += ['-depth', str(depth)] if depth else []
|
args += ["-depth", str(depth)] if depth else []
|
||||||
args += ['-auto-orient'] if auto_orient else []
|
args += ["-auto-orient"] if auto_orient else []
|
||||||
args += [input_file, output_file]
|
args += [input_file, output_file]
|
||||||
|
|
||||||
logger.debug("Execute: " + " ".join(args), extra={'group': logging_group})
|
logger.debug("Execute: " + " ".join(args), extra={"group": logging_group})
|
||||||
|
|
||||||
if not subprocess.Popen(args, env=environment).wait() == 0:
|
if not subprocess.Popen(args, env=environment).wait() == 0:
|
||||||
raise ParseError("Convert failed at {}".format(args))
|
raise ParseError("Convert failed at {}".format(args))
|
||||||
@ -155,19 +156,16 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
|
|||||||
logger.warning(
|
logger.warning(
|
||||||
"Thumbnail generation with ImageMagick failed, falling back "
|
"Thumbnail generation with ImageMagick failed, falling back "
|
||||||
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
|
"to ghostscript. Check your /etc/ImageMagick-x/policy.xml!",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
gs_out_path = os.path.join(temp_dir, "gs_out.png")
|
||||||
cmd = [settings.GS_BINARY,
|
cmd = [settings.GS_BINARY, "-q", "-sDEVICE=pngalpha", "-o", gs_out_path, in_path]
|
||||||
"-q",
|
|
||||||
"-sDEVICE=pngalpha",
|
|
||||||
"-o", gs_out_path,
|
|
||||||
in_path]
|
|
||||||
try:
|
try:
|
||||||
if not subprocess.Popen(cmd).wait() == 0:
|
if not subprocess.Popen(cmd).wait() == 0:
|
||||||
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
raise ParseError("Thumbnail (gs) failed at {}".format(cmd))
|
||||||
# then run convert on the output from gs
|
# then run convert on the output from gs
|
||||||
run_convert(density=300,
|
run_convert(
|
||||||
|
density=300,
|
||||||
scale="500x5000>",
|
scale="500x5000>",
|
||||||
alpha="remove",
|
alpha="remove",
|
||||||
strip=True,
|
strip=True,
|
||||||
@ -175,7 +173,8 @@ def make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group=None):
|
|||||||
auto_orient=True,
|
auto_orient=True,
|
||||||
input_file=gs_out_path,
|
input_file=gs_out_path,
|
||||||
output_file=out_path,
|
output_file=out_path,
|
||||||
logging_group=logging_group)
|
logging_group=logging_group,
|
||||||
|
)
|
||||||
|
|
||||||
return out_path
|
return out_path
|
||||||
|
|
||||||
@ -191,7 +190,8 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
|
|||||||
|
|
||||||
# Run convert to get a decent thumbnail
|
# Run convert to get a decent thumbnail
|
||||||
try:
|
try:
|
||||||
run_convert(density=300,
|
run_convert(
|
||||||
|
density=300,
|
||||||
scale="500x5000>",
|
scale="500x5000>",
|
||||||
alpha="remove",
|
alpha="remove",
|
||||||
strip=True,
|
strip=True,
|
||||||
@ -199,10 +199,10 @@ def make_thumbnail_from_pdf(in_path, temp_dir, logging_group=None):
|
|||||||
auto_orient=True,
|
auto_orient=True,
|
||||||
input_file="{}[0]".format(in_path),
|
input_file="{}[0]".format(in_path),
|
||||||
output_file=out_path,
|
output_file=out_path,
|
||||||
logging_group=logging_group)
|
logging_group=logging_group,
|
||||||
|
)
|
||||||
except ParseError:
|
except ParseError:
|
||||||
out_path = make_thumbnail_from_pdf_gs_fallback(
|
out_path = make_thumbnail_from_pdf_gs_fallback(in_path, temp_dir, logging_group)
|
||||||
in_path, temp_dir, logging_group)
|
|
||||||
|
|
||||||
return out_path
|
return out_path
|
||||||
|
|
||||||
@ -223,15 +223,17 @@ def parse_date(filename, text):
|
|||||||
settings={
|
settings={
|
||||||
"DATE_ORDER": date_order,
|
"DATE_ORDER": date_order,
|
||||||
"PREFER_DAY_OF_MONTH": "first",
|
"PREFER_DAY_OF_MONTH": "first",
|
||||||
"RETURN_AS_TIMEZONE_AWARE":
|
"RETURN_AS_TIMEZONE_AWARE": True,
|
||||||
True
|
},
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def __filter(date):
|
def __filter(date):
|
||||||
if date and date.year > 1900 and \
|
if (
|
||||||
date <= timezone.now() and \
|
date
|
||||||
date.date() not in settings.IGNORE_DATES:
|
and date.year > 1900
|
||||||
|
and date <= timezone.now()
|
||||||
|
and date.date() not in settings.IGNORE_DATES
|
||||||
|
):
|
||||||
return date
|
return date
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -285,8 +287,7 @@ class DocumentParser(LoggingMixin):
|
|||||||
super().__init__()
|
super().__init__()
|
||||||
self.logging_group = logging_group
|
self.logging_group = logging_group
|
||||||
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
|
||||||
self.tempdir = tempfile.mkdtemp(
|
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=settings.SCRATCH_DIR)
|
||||||
prefix="paperless-", dir=settings.SCRATCH_DIR)
|
|
||||||
|
|
||||||
self.archive_path = None
|
self.archive_path = None
|
||||||
self.text = None
|
self.text = None
|
||||||
@ -312,18 +313,21 @@ class DocumentParser(LoggingMixin):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def get_optimised_thumbnail(self,
|
def get_optimised_thumbnail(self, document_path, mime_type, file_name=None):
|
||||||
document_path,
|
|
||||||
mime_type,
|
|
||||||
file_name=None):
|
|
||||||
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
|
thumbnail = self.get_thumbnail(document_path, mime_type, file_name)
|
||||||
if settings.OPTIMIZE_THUMBNAILS:
|
if settings.OPTIMIZE_THUMBNAILS:
|
||||||
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
|
out_path = os.path.join(self.tempdir, "thumb_optipng.png")
|
||||||
|
|
||||||
args = (settings.OPTIPNG_BINARY,
|
args = (
|
||||||
"-silent", "-o5", thumbnail, "-out", out_path)
|
settings.OPTIPNG_BINARY,
|
||||||
|
"-silent",
|
||||||
|
"-o5",
|
||||||
|
thumbnail,
|
||||||
|
"-out",
|
||||||
|
out_path,
|
||||||
|
)
|
||||||
|
|
||||||
self.log('debug', f"Execute: {' '.join(args)}")
|
self.log("debug", f"Execute: {' '.join(args)}")
|
||||||
|
|
||||||
if not subprocess.Popen(args).wait() == 0:
|
if not subprocess.Popen(args).wait() == 0:
|
||||||
raise ParseError("Optipng failed at {}".format(args))
|
raise ParseError("Optipng failed at {}".format(args))
|
||||||
|
@ -9,7 +9,6 @@ from documents.models import Document
|
|||||||
|
|
||||||
|
|
||||||
class SanityCheckMessages:
|
class SanityCheckMessages:
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._messages = []
|
self._messages = []
|
||||||
|
|
||||||
@ -29,7 +28,7 @@ class SanityCheckMessages:
|
|||||||
logger.info("Sanity checker detected no issues.")
|
logger.info("Sanity checker detected no issues.")
|
||||||
else:
|
else:
|
||||||
for msg in self._messages:
|
for msg in self._messages:
|
||||||
logger.log(msg['level'], msg['message'])
|
logger.log(msg["level"], msg["message"])
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
return len(self._messages)
|
return len(self._messages)
|
||||||
@ -38,10 +37,10 @@ class SanityCheckMessages:
|
|||||||
return self._messages[item]
|
return self._messages[item]
|
||||||
|
|
||||||
def has_error(self):
|
def has_error(self):
|
||||||
return any([msg['level'] == logging.ERROR for msg in self._messages])
|
return any([msg["level"] == logging.ERROR for msg in self._messages])
|
||||||
|
|
||||||
def has_warning(self):
|
def has_warning(self):
|
||||||
return any([msg['level'] == logging.WARNING for msg in self._messages])
|
return any([msg["level"] == logging.WARNING for msg in self._messages])
|
||||||
|
|
||||||
|
|
||||||
class SanityCheckFailedException(Exception):
|
class SanityCheckFailedException(Exception):
|
||||||
@ -71,9 +70,7 @@ def check_sanity(progress=False):
|
|||||||
with doc.thumbnail_file as f:
|
with doc.thumbnail_file as f:
|
||||||
f.read()
|
f.read()
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
messages.error(
|
messages.error(f"Cannot read thumbnail file of document {doc.pk}: {e}")
|
||||||
f"Cannot read thumbnail file of document {doc.pk}: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check sanity of the original file
|
# Check sanity of the original file
|
||||||
# TODO: extract method
|
# TODO: extract method
|
||||||
@ -86,8 +83,7 @@ def check_sanity(progress=False):
|
|||||||
with doc.source_file as f:
|
with doc.source_file as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
messages.error(
|
messages.error(f"Cannot read original file of document {doc.pk}: {e}")
|
||||||
f"Cannot read original file of document {doc.pk}: {e}")
|
|
||||||
else:
|
else:
|
||||||
if not checksum == doc.checksum:
|
if not checksum == doc.checksum:
|
||||||
messages.error(
|
messages.error(
|
||||||
@ -108,9 +104,7 @@ def check_sanity(progress=False):
|
|||||||
)
|
)
|
||||||
elif doc.has_archive_version:
|
elif doc.has_archive_version:
|
||||||
if not os.path.isfile(doc.archive_path):
|
if not os.path.isfile(doc.archive_path):
|
||||||
messages.error(
|
messages.error(f"Archived version of document {doc.pk} does not exist.")
|
||||||
f"Archived version of document {doc.pk} does not exist."
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
if os.path.normpath(doc.archive_path) in present_files:
|
if os.path.normpath(doc.archive_path) in present_files:
|
||||||
present_files.remove(os.path.normpath(doc.archive_path))
|
present_files.remove(os.path.normpath(doc.archive_path))
|
||||||
|
@ -7,8 +7,15 @@ from rest_framework import serializers
|
|||||||
from rest_framework.fields import SerializerMethodField
|
from rest_framework.fields import SerializerMethodField
|
||||||
|
|
||||||
from . import bulk_edit
|
from . import bulk_edit
|
||||||
from .models import Correspondent, Tag, Document, DocumentType, \
|
from .models import (
|
||||||
SavedView, SavedViewFilterRule, MatchingModel
|
Correspondent,
|
||||||
|
Tag,
|
||||||
|
Document,
|
||||||
|
DocumentType,
|
||||||
|
SavedView,
|
||||||
|
SavedViewFilterRule,
|
||||||
|
MatchingModel,
|
||||||
|
)
|
||||||
from .parsers import is_mime_type_supported
|
from .parsers import is_mime_type_supported
|
||||||
|
|
||||||
from django.utils.translation import gettext as _
|
from django.utils.translation import gettext as _
|
||||||
@ -23,7 +30,7 @@ class DynamicFieldsModelSerializer(serializers.ModelSerializer):
|
|||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
# Don't pass the 'fields' arg up to the superclass
|
# Don't pass the 'fields' arg up to the superclass
|
||||||
fields = kwargs.pop('fields', None)
|
fields = kwargs.pop("fields", None)
|
||||||
|
|
||||||
# Instantiate the superclass normally
|
# Instantiate the superclass normally
|
||||||
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
|
super(DynamicFieldsModelSerializer, self).__init__(*args, **kwargs)
|
||||||
@ -42,16 +49,19 @@ class MatchingModelSerializer(serializers.ModelSerializer):
|
|||||||
|
|
||||||
def get_slug(self, obj):
|
def get_slug(self, obj):
|
||||||
return slugify(obj.name)
|
return slugify(obj.name)
|
||||||
|
|
||||||
slug = SerializerMethodField()
|
slug = SerializerMethodField()
|
||||||
|
|
||||||
def validate_match(self, match):
|
def validate_match(self, match):
|
||||||
if 'matching_algorithm' in self.initial_data and self.initial_data['matching_algorithm'] == MatchingModel.MATCH_REGEX: # NOQA: E501
|
if (
|
||||||
|
"matching_algorithm" in self.initial_data
|
||||||
|
and self.initial_data["matching_algorithm"] == MatchingModel.MATCH_REGEX
|
||||||
|
): # NOQA: E501
|
||||||
try:
|
try:
|
||||||
re.compile(match)
|
re.compile(match)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
_("Invalid regular expression: %(error)s") %
|
_("Invalid regular expression: %(error)s") % {"error": str(e)}
|
||||||
{'error': str(e)}
|
|
||||||
)
|
)
|
||||||
return match
|
return match
|
||||||
|
|
||||||
@ -70,12 +80,11 @@ class CorrespondentSerializer(MatchingModelSerializer):
|
|||||||
"matching_algorithm",
|
"matching_algorithm",
|
||||||
"is_insensitive",
|
"is_insensitive",
|
||||||
"document_count",
|
"document_count",
|
||||||
"last_correspondence"
|
"last_correspondence",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class DocumentTypeSerializer(MatchingModelSerializer):
|
class DocumentTypeSerializer(MatchingModelSerializer):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = DocumentType
|
model = DocumentType
|
||||||
fields = (
|
fields = (
|
||||||
@ -85,7 +94,7 @@ class DocumentTypeSerializer(MatchingModelSerializer):
|
|||||||
"match",
|
"match",
|
||||||
"matching_algorithm",
|
"matching_algorithm",
|
||||||
"is_insensitive",
|
"is_insensitive",
|
||||||
"document_count"
|
"document_count",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -104,7 +113,7 @@ class ColorField(serializers.Field):
|
|||||||
(10, "#6a3d9a"),
|
(10, "#6a3d9a"),
|
||||||
(11, "#b15928"),
|
(11, "#b15928"),
|
||||||
(12, "#000000"),
|
(12, "#000000"),
|
||||||
(13, "#cccccc")
|
(13, "#cccccc"),
|
||||||
)
|
)
|
||||||
|
|
||||||
def to_internal_value(self, data):
|
def to_internal_value(self, data):
|
||||||
@ -122,7 +131,7 @@ class ColorField(serializers.Field):
|
|||||||
|
|
||||||
class TagSerializerVersion1(MatchingModelSerializer):
|
class TagSerializerVersion1(MatchingModelSerializer):
|
||||||
|
|
||||||
colour = ColorField(source='color', default="#a6cee3")
|
colour = ColorField(source="color", default="#a6cee3")
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Tag
|
model = Tag
|
||||||
@ -135,20 +144,19 @@ class TagSerializerVersion1(MatchingModelSerializer):
|
|||||||
"matching_algorithm",
|
"matching_algorithm",
|
||||||
"is_insensitive",
|
"is_insensitive",
|
||||||
"is_inbox_tag",
|
"is_inbox_tag",
|
||||||
"document_count"
|
"document_count",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
class TagSerializer(MatchingModelSerializer):
|
class TagSerializer(MatchingModelSerializer):
|
||||||
|
|
||||||
def get_text_color(self, obj):
|
def get_text_color(self, obj):
|
||||||
try:
|
try:
|
||||||
h = obj.color.lstrip('#')
|
h = obj.color.lstrip("#")
|
||||||
rgb = tuple(int(h[i:i + 2], 16)/256 for i in (0, 2, 4))
|
rgb = tuple(int(h[i : i + 2], 16) / 256 for i in (0, 2, 4))
|
||||||
luminance = math.sqrt(
|
luminance = math.sqrt(
|
||||||
0.299 * math.pow(rgb[0], 2) +
|
0.299 * math.pow(rgb[0], 2)
|
||||||
0.587 * math.pow(rgb[1], 2) +
|
+ 0.587 * math.pow(rgb[1], 2)
|
||||||
0.114 * math.pow(rgb[2], 2)
|
+ 0.114 * math.pow(rgb[2], 2)
|
||||||
)
|
)
|
||||||
return "#ffffff" if luminance < 0.53 else "#000000"
|
return "#ffffff" if luminance < 0.53 else "#000000"
|
||||||
except ValueError:
|
except ValueError:
|
||||||
@ -168,7 +176,7 @@ class TagSerializer(MatchingModelSerializer):
|
|||||||
"matching_algorithm",
|
"matching_algorithm",
|
||||||
"is_insensitive",
|
"is_insensitive",
|
||||||
"is_inbox_tag",
|
"is_inbox_tag",
|
||||||
"document_count"
|
"document_count",
|
||||||
)
|
)
|
||||||
|
|
||||||
def validate_color(self, color):
|
def validate_color(self, color):
|
||||||
@ -231,7 +239,6 @@ class DocumentSerializer(DynamicFieldsModelSerializer):
|
|||||||
|
|
||||||
|
|
||||||
class SavedViewFilterRuleSerializer(serializers.ModelSerializer):
|
class SavedViewFilterRuleSerializer(serializers.ModelSerializer):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = SavedViewFilterRule
|
model = SavedViewFilterRule
|
||||||
fields = ["rule_type", "value"]
|
fields = ["rule_type", "value"]
|
||||||
@ -244,28 +251,33 @@ class SavedViewSerializer(serializers.ModelSerializer):
|
|||||||
class Meta:
|
class Meta:
|
||||||
model = SavedView
|
model = SavedView
|
||||||
depth = 1
|
depth = 1
|
||||||
fields = ["id", "name", "show_on_dashboard", "show_in_sidebar",
|
fields = [
|
||||||
"sort_field", "sort_reverse", "filter_rules"]
|
"id",
|
||||||
|
"name",
|
||||||
|
"show_on_dashboard",
|
||||||
|
"show_in_sidebar",
|
||||||
|
"sort_field",
|
||||||
|
"sort_reverse",
|
||||||
|
"filter_rules",
|
||||||
|
]
|
||||||
|
|
||||||
def update(self, instance, validated_data):
|
def update(self, instance, validated_data):
|
||||||
if 'filter_rules' in validated_data:
|
if "filter_rules" in validated_data:
|
||||||
rules_data = validated_data.pop('filter_rules')
|
rules_data = validated_data.pop("filter_rules")
|
||||||
else:
|
else:
|
||||||
rules_data = None
|
rules_data = None
|
||||||
super(SavedViewSerializer, self).update(instance, validated_data)
|
super(SavedViewSerializer, self).update(instance, validated_data)
|
||||||
if rules_data is not None:
|
if rules_data is not None:
|
||||||
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
|
SavedViewFilterRule.objects.filter(saved_view=instance).delete()
|
||||||
for rule_data in rules_data:
|
for rule_data in rules_data:
|
||||||
SavedViewFilterRule.objects.create(
|
SavedViewFilterRule.objects.create(saved_view=instance, **rule_data)
|
||||||
saved_view=instance, **rule_data)
|
|
||||||
return instance
|
return instance
|
||||||
|
|
||||||
def create(self, validated_data):
|
def create(self, validated_data):
|
||||||
rules_data = validated_data.pop('filter_rules')
|
rules_data = validated_data.pop("filter_rules")
|
||||||
saved_view = SavedView.objects.create(**validated_data)
|
saved_view = SavedView.objects.create(**validated_data)
|
||||||
for rule_data in rules_data:
|
for rule_data in rules_data:
|
||||||
SavedViewFilterRule.objects.create(
|
SavedViewFilterRule.objects.create(saved_view=saved_view, **rule_data)
|
||||||
saved_view=saved_view, **rule_data)
|
|
||||||
return saved_view
|
return saved_view
|
||||||
|
|
||||||
|
|
||||||
@ -275,20 +287,19 @@ class DocumentListSerializer(serializers.Serializer):
|
|||||||
required=True,
|
required=True,
|
||||||
label="Documents",
|
label="Documents",
|
||||||
write_only=True,
|
write_only=True,
|
||||||
child=serializers.IntegerField()
|
child=serializers.IntegerField(),
|
||||||
)
|
)
|
||||||
|
|
||||||
def _validate_document_id_list(self, documents, name="documents"):
|
def _validate_document_id_list(self, documents, name="documents"):
|
||||||
if not type(documents) == list:
|
if not type(documents) == list:
|
||||||
raise serializers.ValidationError(f"{name} must be a list")
|
raise serializers.ValidationError(f"{name} must be a list")
|
||||||
if not all([type(i) == int for i in documents]):
|
if not all([type(i) == int for i in documents]):
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(f"{name} must be a list of integers")
|
||||||
f"{name} must be a list of integers")
|
|
||||||
count = Document.objects.filter(id__in=documents).count()
|
count = Document.objects.filter(id__in=documents).count()
|
||||||
if not count == len(documents):
|
if not count == len(documents):
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
f"Some documents in {name} don't exist or were "
|
f"Some documents in {name} don't exist or were " f"specified twice."
|
||||||
f"specified twice.")
|
)
|
||||||
|
|
||||||
def validate_documents(self, documents):
|
def validate_documents(self, documents):
|
||||||
self._validate_document_id_list(documents)
|
self._validate_document_id_list(documents)
|
||||||
@ -304,7 +315,7 @@ class BulkEditSerializer(DocumentListSerializer):
|
|||||||
"add_tag",
|
"add_tag",
|
||||||
"remove_tag",
|
"remove_tag",
|
||||||
"modify_tags",
|
"modify_tags",
|
||||||
"delete"
|
"delete",
|
||||||
],
|
],
|
||||||
label="Method",
|
label="Method",
|
||||||
write_only=True,
|
write_only=True,
|
||||||
@ -316,12 +327,12 @@ class BulkEditSerializer(DocumentListSerializer):
|
|||||||
if not type(tags) == list:
|
if not type(tags) == list:
|
||||||
raise serializers.ValidationError(f"{name} must be a list")
|
raise serializers.ValidationError(f"{name} must be a list")
|
||||||
if not all([type(i) == int for i in tags]):
|
if not all([type(i) == int for i in tags]):
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(f"{name} must be a list of integers")
|
||||||
f"{name} must be a list of integers")
|
|
||||||
count = Tag.objects.filter(id__in=tags).count()
|
count = Tag.objects.filter(id__in=tags).count()
|
||||||
if not count == len(tags):
|
if not count == len(tags):
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
f"Some tags in {name} don't exist or were specified twice.")
|
f"Some tags in {name} don't exist or were specified twice."
|
||||||
|
)
|
||||||
|
|
||||||
def validate_method(self, method):
|
def validate_method(self, method):
|
||||||
if method == "set_correspondent":
|
if method == "set_correspondent":
|
||||||
@ -340,8 +351,8 @@ class BulkEditSerializer(DocumentListSerializer):
|
|||||||
raise serializers.ValidationError("Unsupported method.")
|
raise serializers.ValidationError("Unsupported method.")
|
||||||
|
|
||||||
def _validate_parameters_tags(self, parameters):
|
def _validate_parameters_tags(self, parameters):
|
||||||
if 'tag' in parameters:
|
if "tag" in parameters:
|
||||||
tag_id = parameters['tag']
|
tag_id = parameters["tag"]
|
||||||
try:
|
try:
|
||||||
Tag.objects.get(id=tag_id)
|
Tag.objects.get(id=tag_id)
|
||||||
except Tag.DoesNotExist:
|
except Tag.DoesNotExist:
|
||||||
@ -350,48 +361,45 @@ class BulkEditSerializer(DocumentListSerializer):
|
|||||||
raise serializers.ValidationError("tag not specified")
|
raise serializers.ValidationError("tag not specified")
|
||||||
|
|
||||||
def _validate_parameters_document_type(self, parameters):
|
def _validate_parameters_document_type(self, parameters):
|
||||||
if 'document_type' in parameters:
|
if "document_type" in parameters:
|
||||||
document_type_id = parameters['document_type']
|
document_type_id = parameters["document_type"]
|
||||||
if document_type_id is None:
|
if document_type_id is None:
|
||||||
# None is ok
|
# None is ok
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
DocumentType.objects.get(id=document_type_id)
|
DocumentType.objects.get(id=document_type_id)
|
||||||
except DocumentType.DoesNotExist:
|
except DocumentType.DoesNotExist:
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError("Document type does not exist")
|
||||||
"Document type does not exist")
|
|
||||||
else:
|
else:
|
||||||
raise serializers.ValidationError("document_type not specified")
|
raise serializers.ValidationError("document_type not specified")
|
||||||
|
|
||||||
def _validate_parameters_correspondent(self, parameters):
|
def _validate_parameters_correspondent(self, parameters):
|
||||||
if 'correspondent' in parameters:
|
if "correspondent" in parameters:
|
||||||
correspondent_id = parameters['correspondent']
|
correspondent_id = parameters["correspondent"]
|
||||||
if correspondent_id is None:
|
if correspondent_id is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
Correspondent.objects.get(id=correspondent_id)
|
Correspondent.objects.get(id=correspondent_id)
|
||||||
except Correspondent.DoesNotExist:
|
except Correspondent.DoesNotExist:
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError("Correspondent does not exist")
|
||||||
"Correspondent does not exist")
|
|
||||||
else:
|
else:
|
||||||
raise serializers.ValidationError("correspondent not specified")
|
raise serializers.ValidationError("correspondent not specified")
|
||||||
|
|
||||||
def _validate_parameters_modify_tags(self, parameters):
|
def _validate_parameters_modify_tags(self, parameters):
|
||||||
if "add_tags" in parameters:
|
if "add_tags" in parameters:
|
||||||
self._validate_tag_id_list(parameters['add_tags'], "add_tags")
|
self._validate_tag_id_list(parameters["add_tags"], "add_tags")
|
||||||
else:
|
else:
|
||||||
raise serializers.ValidationError("add_tags not specified")
|
raise serializers.ValidationError("add_tags not specified")
|
||||||
|
|
||||||
if "remove_tags" in parameters:
|
if "remove_tags" in parameters:
|
||||||
self._validate_tag_id_list(parameters['remove_tags'],
|
self._validate_tag_id_list(parameters["remove_tags"], "remove_tags")
|
||||||
"remove_tags")
|
|
||||||
else:
|
else:
|
||||||
raise serializers.ValidationError("remove_tags not specified")
|
raise serializers.ValidationError("remove_tags not specified")
|
||||||
|
|
||||||
def validate(self, attrs):
|
def validate(self, attrs):
|
||||||
|
|
||||||
method = attrs['method']
|
method = attrs["method"]
|
||||||
parameters = attrs['parameters']
|
parameters = attrs["parameters"]
|
||||||
|
|
||||||
if method == bulk_edit.set_correspondent:
|
if method == bulk_edit.set_correspondent:
|
||||||
self._validate_parameters_correspondent(parameters)
|
self._validate_parameters_correspondent(parameters)
|
||||||
@ -448,8 +456,7 @@ class PostDocumentSerializer(serializers.Serializer):
|
|||||||
|
|
||||||
if not is_mime_type_supported(mime_type):
|
if not is_mime_type_supported(mime_type):
|
||||||
raise serializers.ValidationError(
|
raise serializers.ValidationError(
|
||||||
_("File type %(type)s not supported") %
|
_("File type %(type)s not supported") % {"type": mime_type}
|
||||||
{'type': mime_type}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return document.name, document_data
|
return document.name, document_data
|
||||||
@ -476,13 +483,11 @@ class PostDocumentSerializer(serializers.Serializer):
|
|||||||
class BulkDownloadSerializer(DocumentListSerializer):
|
class BulkDownloadSerializer(DocumentListSerializer):
|
||||||
|
|
||||||
content = serializers.ChoiceField(
|
content = serializers.ChoiceField(
|
||||||
choices=["archive", "originals", "both"],
|
choices=["archive", "originals", "both"], default="archive"
|
||||||
default="archive"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
compression = serializers.ChoiceField(
|
compression = serializers.ChoiceField(
|
||||||
choices=["none", "deflated", "bzip2", "lzma"],
|
choices=["none", "deflated", "bzip2", "lzma"], default="none"
|
||||||
default="none"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def validate_compression(self, compression):
|
def validate_compression(self, compression):
|
||||||
@ -492,5 +497,5 @@ class BulkDownloadSerializer(DocumentListSerializer):
|
|||||||
"none": zipfile.ZIP_STORED,
|
"none": zipfile.ZIP_STORED,
|
||||||
"deflated": zipfile.ZIP_DEFLATED,
|
"deflated": zipfile.ZIP_DEFLATED,
|
||||||
"bzip2": zipfile.ZIP_BZIP2,
|
"bzip2": zipfile.ZIP_BZIP2,
|
||||||
"lzma": zipfile.ZIP_LZMA
|
"lzma": zipfile.ZIP_LZMA,
|
||||||
}[compression]
|
}[compression]
|
||||||
|
@ -13,9 +13,11 @@ from django.utils import termcolors, timezone
|
|||||||
from filelock import FileLock
|
from filelock import FileLock
|
||||||
|
|
||||||
from .. import matching
|
from .. import matching
|
||||||
from ..file_handling import delete_empty_directories, \
|
from ..file_handling import (
|
||||||
create_source_path_directory, \
|
delete_empty_directories,
|
||||||
generate_unique_filename
|
create_source_path_directory,
|
||||||
|
generate_unique_filename,
|
||||||
|
)
|
||||||
from ..models import Document, Tag, MatchingModel
|
from ..models import Document, Tag, MatchingModel
|
||||||
|
|
||||||
|
|
||||||
@ -27,7 +29,8 @@ def add_inbox_tags(sender, document=None, logging_group=None, **kwargs):
|
|||||||
document.tags.add(*inbox_tags)
|
document.tags.add(*inbox_tags)
|
||||||
|
|
||||||
|
|
||||||
def set_correspondent(sender,
|
def set_correspondent(
|
||||||
|
sender,
|
||||||
document=None,
|
document=None,
|
||||||
logging_group=None,
|
logging_group=None,
|
||||||
classifier=None,
|
classifier=None,
|
||||||
@ -36,12 +39,12 @@ def set_correspondent(sender,
|
|||||||
suggest=False,
|
suggest=False,
|
||||||
base_url=None,
|
base_url=None,
|
||||||
color=False,
|
color=False,
|
||||||
**kwargs):
|
**kwargs,
|
||||||
|
):
|
||||||
if document.correspondent and not replace:
|
if document.correspondent and not replace:
|
||||||
return
|
return
|
||||||
|
|
||||||
potential_correspondents = matching.match_correspondents(document,
|
potential_correspondents = matching.match_correspondents(document, classifier)
|
||||||
classifier)
|
|
||||||
|
|
||||||
potential_count = len(potential_correspondents)
|
potential_count = len(potential_correspondents)
|
||||||
if potential_correspondents:
|
if potential_correspondents:
|
||||||
@ -53,13 +56,13 @@ def set_correspondent(sender,
|
|||||||
logger.debug(
|
logger.debug(
|
||||||
f"Detected {potential_count} potential correspondents, "
|
f"Detected {potential_count} potential correspondents, "
|
||||||
f"so we've opted for {selected}",
|
f"so we've opted for {selected}",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Detected {potential_count} potential correspondents, "
|
f"Detected {potential_count} potential correspondents, "
|
||||||
f"not assigning any correspondent",
|
f"not assigning any correspondent",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -67,7 +70,7 @@ def set_correspondent(sender,
|
|||||||
if suggest:
|
if suggest:
|
||||||
if base_url:
|
if base_url:
|
||||||
print(
|
print(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
)
|
)
|
||||||
@ -75,23 +78,25 @@ def set_correspondent(sender,
|
|||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
(
|
(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
) + f" [{document.pk}]"
|
)
|
||||||
|
+ f" [{document.pk}]"
|
||||||
)
|
)
|
||||||
print(f"Suggest correspondent {selected}")
|
print(f"Suggest correspondent {selected}")
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Assigning correspondent {selected} to {document}",
|
f"Assigning correspondent {selected} to {document}",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
|
|
||||||
document.correspondent = selected
|
document.correspondent = selected
|
||||||
document.save(update_fields=("correspondent",))
|
document.save(update_fields=("correspondent",))
|
||||||
|
|
||||||
|
|
||||||
def set_document_type(sender,
|
def set_document_type(
|
||||||
|
sender,
|
||||||
document=None,
|
document=None,
|
||||||
logging_group=None,
|
logging_group=None,
|
||||||
classifier=None,
|
classifier=None,
|
||||||
@ -100,12 +105,12 @@ def set_document_type(sender,
|
|||||||
suggest=False,
|
suggest=False,
|
||||||
base_url=None,
|
base_url=None,
|
||||||
color=False,
|
color=False,
|
||||||
**kwargs):
|
**kwargs,
|
||||||
|
):
|
||||||
if document.document_type and not replace:
|
if document.document_type and not replace:
|
||||||
return
|
return
|
||||||
|
|
||||||
potential_document_type = matching.match_document_types(document,
|
potential_document_type = matching.match_document_types(document, classifier)
|
||||||
classifier)
|
|
||||||
|
|
||||||
potential_count = len(potential_document_type)
|
potential_count = len(potential_document_type)
|
||||||
if potential_document_type:
|
if potential_document_type:
|
||||||
@ -118,13 +123,13 @@ def set_document_type(sender,
|
|||||||
logger.info(
|
logger.info(
|
||||||
f"Detected {potential_count} potential document types, "
|
f"Detected {potential_count} potential document types, "
|
||||||
f"so we've opted for {selected}",
|
f"so we've opted for {selected}",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Detected {potential_count} potential document types, "
|
f"Detected {potential_count} potential document types, "
|
||||||
f"not assigning any document type",
|
f"not assigning any document type",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -132,7 +137,7 @@ def set_document_type(sender,
|
|||||||
if suggest:
|
if suggest:
|
||||||
if base_url:
|
if base_url:
|
||||||
print(
|
print(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
)
|
)
|
||||||
@ -140,23 +145,25 @@ def set_document_type(sender,
|
|||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
(
|
(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
) + f" [{document.pk}]"
|
)
|
||||||
|
+ f" [{document.pk}]"
|
||||||
)
|
)
|
||||||
print(f"Suggest document type {selected}")
|
print(f"Suggest document type {selected}")
|
||||||
else:
|
else:
|
||||||
logger.info(
|
logger.info(
|
||||||
f"Assigning document type {selected} to {document}",
|
f"Assigning document type {selected} to {document}",
|
||||||
extra={'group': logging_group}
|
extra={"group": logging_group},
|
||||||
)
|
)
|
||||||
|
|
||||||
document.document_type = selected
|
document.document_type = selected
|
||||||
document.save(update_fields=("document_type",))
|
document.save(update_fields=("document_type",))
|
||||||
|
|
||||||
|
|
||||||
def set_tags(sender,
|
def set_tags(
|
||||||
|
sender,
|
||||||
document=None,
|
document=None,
|
||||||
logging_group=None,
|
logging_group=None,
|
||||||
classifier=None,
|
classifier=None,
|
||||||
@ -164,11 +171,13 @@ def set_tags(sender,
|
|||||||
suggest=False,
|
suggest=False,
|
||||||
base_url=None,
|
base_url=None,
|
||||||
color=False,
|
color=False,
|
||||||
**kwargs):
|
**kwargs,
|
||||||
|
):
|
||||||
|
|
||||||
if replace:
|
if replace:
|
||||||
Document.tags.through.objects.filter(document=document).exclude(
|
Document.tags.through.objects.filter(document=document).exclude(
|
||||||
Q(tag__is_inbox_tag=True)).exclude(
|
Q(tag__is_inbox_tag=True)
|
||||||
|
).exclude(
|
||||||
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
|
Q(tag__match="") & ~Q(tag__matching_algorithm=Tag.MATCH_AUTO)
|
||||||
).delete()
|
).delete()
|
||||||
|
|
||||||
@ -181,14 +190,13 @@ def set_tags(sender,
|
|||||||
if suggest:
|
if suggest:
|
||||||
extra_tags = current_tags - set(matched_tags)
|
extra_tags = current_tags - set(matched_tags)
|
||||||
extra_tags = [
|
extra_tags = [
|
||||||
t for t in extra_tags
|
t for t in extra_tags if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
||||||
if t.matching_algorithm == MatchingModel.MATCH_AUTO
|
|
||||||
]
|
]
|
||||||
if not relevant_tags and not extra_tags:
|
if not relevant_tags and not extra_tags:
|
||||||
return
|
return
|
||||||
if base_url:
|
if base_url:
|
||||||
print(
|
print(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
)
|
)
|
||||||
@ -196,15 +204,14 @@ def set_tags(sender,
|
|||||||
else:
|
else:
|
||||||
print(
|
print(
|
||||||
(
|
(
|
||||||
termcolors.colorize(str(document), fg='green')
|
termcolors.colorize(str(document), fg="green")
|
||||||
if color
|
if color
|
||||||
else str(document)
|
else str(document)
|
||||||
) + f" [{document.pk}]"
|
)
|
||||||
|
+ f" [{document.pk}]"
|
||||||
)
|
)
|
||||||
if relevant_tags:
|
if relevant_tags:
|
||||||
print(
|
print("Suggest tags: " + ", ".join([t.name for t in relevant_tags]))
|
||||||
"Suggest tags: " + ", ".join([t.name for t in relevant_tags])
|
|
||||||
)
|
|
||||||
if extra_tags:
|
if extra_tags:
|
||||||
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
print("Extra tags: " + ", ".join([t.name for t in extra_tags]))
|
||||||
else:
|
else:
|
||||||
@ -213,10 +220,8 @@ def set_tags(sender,
|
|||||||
|
|
||||||
message = 'Tagging "{}" with "{}"'
|
message = 'Tagging "{}" with "{}"'
|
||||||
logger.info(
|
logger.info(
|
||||||
message.format(
|
message.format(document, ", ".join([t.name for t in relevant_tags])),
|
||||||
document, ", ".join([t.name for t in relevant_tags])
|
extra={"group": logging_group},
|
||||||
),
|
|
||||||
extra={'group': logging_group}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
document.tags.add(*relevant_tags)
|
document.tags.add(*relevant_tags)
|
||||||
@ -235,9 +240,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
|||||||
while True:
|
while True:
|
||||||
new_file_path = os.path.join(
|
new_file_path = os.path.join(
|
||||||
settings.TRASH_DIR,
|
settings.TRASH_DIR,
|
||||||
old_filebase +
|
old_filebase + (f"_{counter:02}" if counter else "") + old_fileext,
|
||||||
(f"_{counter:02}" if counter else "") +
|
|
||||||
old_fileext
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if os.path.exists(new_file_path):
|
if os.path.exists(new_file_path):
|
||||||
@ -245,8 +248,7 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
|||||||
else:
|
else:
|
||||||
break
|
break
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(f"Moving {instance.source_path} to trash at {new_file_path}")
|
||||||
f"Moving {instance.source_path} to trash at {new_file_path}")
|
|
||||||
try:
|
try:
|
||||||
os.rename(instance.source_path, new_file_path)
|
os.rename(instance.source_path, new_file_path)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
@ -256,14 +258,15 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
|||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
for filename in (instance.source_path,
|
for filename in (
|
||||||
|
instance.source_path,
|
||||||
instance.archive_path,
|
instance.archive_path,
|
||||||
instance.thumbnail_path):
|
instance.thumbnail_path,
|
||||||
|
):
|
||||||
if filename and os.path.isfile(filename):
|
if filename and os.path.isfile(filename):
|
||||||
try:
|
try:
|
||||||
os.unlink(filename)
|
os.unlink(filename)
|
||||||
logger.debug(
|
logger.debug(f"Deleted file {filename}.")
|
||||||
f"Deleted file {filename}.")
|
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"While deleting document {str(instance)}, the file "
|
f"While deleting document {str(instance)}, the file "
|
||||||
@ -271,14 +274,12 @@ def cleanup_document_deletion(sender, instance, using, **kwargs):
|
|||||||
)
|
)
|
||||||
|
|
||||||
delete_empty_directories(
|
delete_empty_directories(
|
||||||
os.path.dirname(instance.source_path),
|
os.path.dirname(instance.source_path), root=settings.ORIGINALS_DIR
|
||||||
root=settings.ORIGINALS_DIR
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if instance.has_archive_version:
|
if instance.has_archive_version:
|
||||||
delete_empty_directories(
|
delete_empty_directories(
|
||||||
os.path.dirname(instance.archive_path),
|
os.path.dirname(instance.archive_path), root=settings.ARCHIVE_DIR
|
||||||
root=settings.ARCHIVE_DIR
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -289,15 +290,15 @@ class CannotMoveFilesException(Exception):
|
|||||||
def validate_move(instance, old_path, new_path):
|
def validate_move(instance, old_path, new_path):
|
||||||
if not os.path.isfile(old_path):
|
if not os.path.isfile(old_path):
|
||||||
# Can't do anything if the old file does not exist anymore.
|
# Can't do anything if the old file does not exist anymore.
|
||||||
logger.fatal(
|
logger.fatal(f"Document {str(instance)}: File {old_path} has gone.")
|
||||||
f"Document {str(instance)}: File {old_path} has gone.")
|
|
||||||
raise CannotMoveFilesException()
|
raise CannotMoveFilesException()
|
||||||
|
|
||||||
if os.path.isfile(new_path):
|
if os.path.isfile(new_path):
|
||||||
# Can't do anything if the new file already exists. Skip updating file.
|
# Can't do anything if the new file already exists. Skip updating file.
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Document {str(instance)}: Cannot rename file "
|
f"Document {str(instance)}: Cannot rename file "
|
||||||
f"since target path {new_path} already exists.")
|
f"since target path {new_path} already exists."
|
||||||
|
)
|
||||||
raise CannotMoveFilesException()
|
raise CannotMoveFilesException()
|
||||||
|
|
||||||
|
|
||||||
@ -333,7 +334,9 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
instance, archive_filename=True
|
instance, archive_filename=True
|
||||||
)
|
)
|
||||||
|
|
||||||
move_archive = old_archive_filename != instance.archive_filename # NOQA: E501
|
move_archive = (
|
||||||
|
old_archive_filename != instance.archive_filename
|
||||||
|
) # NOQA: E501
|
||||||
else:
|
else:
|
||||||
move_archive = False
|
move_archive = False
|
||||||
|
|
||||||
@ -347,8 +350,7 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
os.rename(old_source_path, instance.source_path)
|
os.rename(old_source_path, instance.source_path)
|
||||||
|
|
||||||
if move_archive:
|
if move_archive:
|
||||||
validate_move(
|
validate_move(instance, old_archive_path, instance.archive_path)
|
||||||
instance, old_archive_path, instance.archive_path)
|
|
||||||
create_source_path_directory(instance.archive_path)
|
create_source_path_directory(instance.archive_path)
|
||||||
os.rename(old_archive_path, instance.archive_path)
|
os.rename(old_archive_path, instance.archive_path)
|
||||||
|
|
||||||
@ -390,12 +392,16 @@ def update_filename_and_move_files(sender, instance, **kwargs):
|
|||||||
# finally, remove any empty sub folders. This will do nothing if
|
# finally, remove any empty sub folders. This will do nothing if
|
||||||
# something has failed above.
|
# something has failed above.
|
||||||
if not os.path.isfile(old_source_path):
|
if not os.path.isfile(old_source_path):
|
||||||
delete_empty_directories(os.path.dirname(old_source_path),
|
delete_empty_directories(
|
||||||
root=settings.ORIGINALS_DIR)
|
os.path.dirname(old_source_path), root=settings.ORIGINALS_DIR
|
||||||
|
)
|
||||||
|
|
||||||
if instance.has_archive_version and not os.path.isfile(old_archive_path): # NOQA: E501
|
if instance.has_archive_version and not os.path.isfile(
|
||||||
delete_empty_directories(os.path.dirname(old_archive_path),
|
old_archive_path
|
||||||
root=settings.ARCHIVE_DIR)
|
): # NOQA: E501
|
||||||
|
delete_empty_directories(
|
||||||
|
os.path.dirname(old_archive_path), root=settings.ARCHIVE_DIR
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
|
def set_log_entry(sender, document=None, logging_group=None, **kwargs):
|
||||||
|
@ -31,12 +31,11 @@ def index_reindex(progress_bar_disable=False):
|
|||||||
|
|
||||||
|
|
||||||
def train_classifier():
|
def train_classifier():
|
||||||
if (not Tag.objects.filter(
|
if (
|
||||||
matching_algorithm=Tag.MATCH_AUTO).exists() and
|
not Tag.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
not DocumentType.objects.filter(
|
and not DocumentType.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
matching_algorithm=Tag.MATCH_AUTO).exists() and
|
and not Correspondent.objects.filter(matching_algorithm=Tag.MATCH_AUTO).exists()
|
||||||
not Correspondent.objects.filter(
|
):
|
||||||
matching_algorithm=Tag.MATCH_AUTO).exists()):
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@ -48,28 +47,25 @@ def train_classifier():
|
|||||||
try:
|
try:
|
||||||
if classifier.train():
|
if classifier.train():
|
||||||
logger.info(
|
logger.info(
|
||||||
"Saving updated classifier model to {}...".format(
|
"Saving updated classifier model to {}...".format(settings.MODEL_FILE)
|
||||||
settings.MODEL_FILE)
|
|
||||||
)
|
)
|
||||||
classifier.save()
|
classifier.save()
|
||||||
else:
|
else:
|
||||||
logger.debug(
|
logger.debug("Training data unchanged.")
|
||||||
"Training data unchanged."
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
logger.warning("Classifier error: " + str(e))
|
||||||
"Classifier error: " + str(e)
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def consume_file(path,
|
def consume_file(
|
||||||
|
path,
|
||||||
override_filename=None,
|
override_filename=None,
|
||||||
override_title=None,
|
override_title=None,
|
||||||
override_correspondent_id=None,
|
override_correspondent_id=None,
|
||||||
override_document_type_id=None,
|
override_document_type_id=None,
|
||||||
override_tag_ids=None,
|
override_tag_ids=None,
|
||||||
task_id=None):
|
task_id=None,
|
||||||
|
):
|
||||||
|
|
||||||
document = Consumer().try_consume_file(
|
document = Consumer().try_consume_file(
|
||||||
path,
|
path,
|
||||||
@ -78,16 +74,16 @@ def consume_file(path,
|
|||||||
override_correspondent_id=override_correspondent_id,
|
override_correspondent_id=override_correspondent_id,
|
||||||
override_document_type_id=override_document_type_id,
|
override_document_type_id=override_document_type_id,
|
||||||
override_tag_ids=override_tag_ids,
|
override_tag_ids=override_tag_ids,
|
||||||
task_id=task_id
|
task_id=task_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
if document:
|
if document:
|
||||||
return "Success. New document id {} created".format(
|
return "Success. New document id {} created".format(document.pk)
|
||||||
document.pk
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
raise ConsumerError("Unknown error: Returned document was null, but "
|
raise ConsumerError(
|
||||||
"no error message was given.")
|
"Unknown error: Returned document was null, but "
|
||||||
|
"no error message was given."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def sanity_check():
|
def sanity_check():
|
||||||
@ -96,8 +92,7 @@ def sanity_check():
|
|||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
|
|
||||||
if messages.has_error():
|
if messages.has_error():
|
||||||
raise SanityCheckFailedException(
|
raise SanityCheckFailedException("Sanity check failed with errors. See log.")
|
||||||
"Sanity check failed with errors. See log.")
|
|
||||||
elif messages.has_warning():
|
elif messages.has_warning():
|
||||||
return "Sanity check exited with warnings. See log."
|
return "Sanity check exited with warnings. See log."
|
||||||
elif len(messages) > 0:
|
elif len(messages) > 0:
|
||||||
|
@ -5,7 +5,6 @@ from ..models import Document, Correspondent
|
|||||||
|
|
||||||
|
|
||||||
class CorrespondentFactory(DjangoModelFactory):
|
class CorrespondentFactory(DjangoModelFactory):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Correspondent
|
model = Correspondent
|
||||||
|
|
||||||
@ -13,6 +12,5 @@ class CorrespondentFactory(DjangoModelFactory):
|
|||||||
|
|
||||||
|
|
||||||
class DocumentFactory(DjangoModelFactory):
|
class DocumentFactory(DjangoModelFactory):
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = Document
|
model = Document
|
||||||
|
@ -11,7 +11,6 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def get_document_from_index(self, doc):
|
def get_document_from_index(self, doc):
|
||||||
ix = index.open_index()
|
ix = index.open_index()
|
||||||
with ix.searcher() as searcher:
|
with ix.searcher() as searcher:
|
||||||
@ -27,7 +26,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
|||||||
doc.title = "new title"
|
doc.title = "new title"
|
||||||
self.doc_admin.save_model(None, doc, None, None)
|
self.doc_admin.save_model(None, doc, None, None)
|
||||||
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
self.assertEqual(Document.objects.get(id=doc.id).title, "new title")
|
||||||
self.assertEqual(self.get_document_from_index(doc)['id'], doc.id)
|
self.assertEqual(self.get_document_from_index(doc)["id"], doc.id)
|
||||||
|
|
||||||
def test_delete_model(self):
|
def test_delete_model(self):
|
||||||
doc = Document.objects.create(title="test")
|
doc = Document.objects.create(title="test")
|
||||||
@ -42,7 +41,9 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
|||||||
def test_delete_queryset(self):
|
def test_delete_queryset(self):
|
||||||
docs = []
|
docs = []
|
||||||
for i in range(42):
|
for i in range(42):
|
||||||
doc = Document.objects.create(title="Many documents with the same title", checksum=f"{i:02}")
|
doc = Document.objects.create(
|
||||||
|
title="Many documents with the same title", checksum=f"{i:02}"
|
||||||
|
)
|
||||||
docs.append(doc)
|
docs.append(doc)
|
||||||
index.add_or_update_document(doc)
|
index.add_or_update_document(doc)
|
||||||
|
|
||||||
@ -59,5 +60,7 @@ class TestDocumentAdmin(DirectoriesMixin, TestCase):
|
|||||||
self.assertIsNone(self.get_document_from_index(doc))
|
self.assertIsNone(self.get_document_from_index(doc))
|
||||||
|
|
||||||
def test_created(self):
|
def test_created(self):
|
||||||
doc = Document.objects.create(title="test", created=timezone.datetime(2020, 4, 12))
|
doc = Document.objects.create(
|
||||||
|
title="test", created=timezone.datetime(2020, 4, 12)
|
||||||
|
)
|
||||||
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
self.assertEqual(self.doc_admin.created_(doc), "2020-04-12")
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -11,7 +11,6 @@ from ..models import Document
|
|||||||
|
|
||||||
|
|
||||||
class ChecksTestCase(TestCase):
|
class ChecksTestCase(TestCase):
|
||||||
|
|
||||||
def test_changed_password_check_empty_db(self):
|
def test_changed_password_check_empty_db(self):
|
||||||
self.assertEqual(changed_password_check(None), [])
|
self.assertEqual(changed_password_check(None), [])
|
||||||
|
|
||||||
@ -23,8 +22,15 @@ class ChecksTestCase(TestCase):
|
|||||||
|
|
||||||
self.assertEqual(parser_check(None), [])
|
self.assertEqual(parser_check(None), [])
|
||||||
|
|
||||||
with mock.patch('documents.checks.document_consumer_declaration.send') as m:
|
with mock.patch("documents.checks.document_consumer_declaration.send") as m:
|
||||||
m.return_value = []
|
m.return_value = []
|
||||||
|
|
||||||
self.assertEqual(parser_check(None), [Error("No parsers found. This is a bug. The consumer won't be "
|
self.assertEqual(
|
||||||
"able to consume any documents without parsers.")])
|
parser_check(None),
|
||||||
|
[
|
||||||
|
Error(
|
||||||
|
"No parsers found. This is a bug. The consumer won't be "
|
||||||
|
"able to consume any documents without parsers."
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
@ -7,30 +7,60 @@ import pytest
|
|||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from documents.classifier import DocumentClassifier, IncompatibleClassifierVersionError, load_classifier
|
from documents.classifier import (
|
||||||
|
DocumentClassifier,
|
||||||
|
IncompatibleClassifierVersionError,
|
||||||
|
load_classifier,
|
||||||
|
)
|
||||||
from documents.models import Correspondent, Document, Tag, DocumentType
|
from documents.models import Correspondent, Document, Tag, DocumentType
|
||||||
from documents.tests.utils import DirectoriesMixin
|
from documents.tests.utils import DirectoriesMixin
|
||||||
|
|
||||||
|
|
||||||
class TestClassifier(DirectoriesMixin, TestCase):
|
class TestClassifier(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(TestClassifier, self).setUp()
|
super(TestClassifier, self).setUp()
|
||||||
self.classifier = DocumentClassifier()
|
self.classifier = DocumentClassifier()
|
||||||
|
|
||||||
def generate_test_data(self):
|
def generate_test_data(self):
|
||||||
self.c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
self.c1 = Correspondent.objects.create(
|
||||||
|
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||||
|
)
|
||||||
self.c2 = Correspondent.objects.create(name="c2")
|
self.c2 = Correspondent.objects.create(name="c2")
|
||||||
self.c3 = Correspondent.objects.create(name="c3", matching_algorithm=Correspondent.MATCH_AUTO)
|
self.c3 = Correspondent.objects.create(
|
||||||
self.t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
name="c3", matching_algorithm=Correspondent.MATCH_AUTO
|
||||||
self.t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True)
|
)
|
||||||
self.t3 = Tag.objects.create(name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45)
|
self.t1 = Tag.objects.create(
|
||||||
self.dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12
|
||||||
self.dt2 = DocumentType.objects.create(name="dt2", matching_algorithm=DocumentType.MATCH_AUTO)
|
)
|
||||||
|
self.t2 = Tag.objects.create(
|
||||||
|
name="t2", matching_algorithm=Tag.MATCH_ANY, pk=34, is_inbox_tag=True
|
||||||
|
)
|
||||||
|
self.t3 = Tag.objects.create(
|
||||||
|
name="t3", matching_algorithm=Tag.MATCH_AUTO, pk=45
|
||||||
|
)
|
||||||
|
self.dt = DocumentType.objects.create(
|
||||||
|
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||||
|
)
|
||||||
|
self.dt2 = DocumentType.objects.create(
|
||||||
|
name="dt2", matching_algorithm=DocumentType.MATCH_AUTO
|
||||||
|
)
|
||||||
|
|
||||||
self.doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=self.c1, checksum="A", document_type=self.dt)
|
self.doc1 = Document.objects.create(
|
||||||
self.doc2 = Document.objects.create(title="doc1", content="this is another document, but from c2", correspondent=self.c2, checksum="B")
|
title="doc1",
|
||||||
self.doc_inbox = Document.objects.create(title="doc235", content="aa", checksum="C")
|
content="this is a document from c1",
|
||||||
|
correspondent=self.c1,
|
||||||
|
checksum="A",
|
||||||
|
document_type=self.dt,
|
||||||
|
)
|
||||||
|
self.doc2 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
content="this is another document, but from c2",
|
||||||
|
correspondent=self.c2,
|
||||||
|
checksum="B",
|
||||||
|
)
|
||||||
|
self.doc_inbox = Document.objects.create(
|
||||||
|
title="doc235", content="aa", checksum="C"
|
||||||
|
)
|
||||||
|
|
||||||
self.doc1.tags.add(self.t1)
|
self.doc1.tags.add(self.t1)
|
||||||
self.doc2.tags.add(self.t1)
|
self.doc2.tags.add(self.t1)
|
||||||
@ -59,17 +89,29 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
def testTrain(self):
|
def testTrain(self):
|
||||||
self.generate_test_data()
|
self.generate_test_data()
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertListEqual(list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk])
|
self.assertListEqual(
|
||||||
self.assertListEqual(list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk])
|
list(self.classifier.correspondent_classifier.classes_), [-1, self.c1.pk]
|
||||||
|
)
|
||||||
|
self.assertListEqual(
|
||||||
|
list(self.classifier.tags_binarizer.classes_), [self.t1.pk, self.t3.pk]
|
||||||
|
)
|
||||||
|
|
||||||
def testPredict(self):
|
def testPredict(self):
|
||||||
self.generate_test_data()
|
self.generate_test_data()
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_correspondent(self.doc1.content), self.c1.pk)
|
self.assertEqual(
|
||||||
|
self.classifier.predict_correspondent(self.doc1.content), self.c1.pk
|
||||||
|
)
|
||||||
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
self.assertEqual(self.classifier.predict_correspondent(self.doc2.content), None)
|
||||||
self.assertListEqual(self.classifier.predict_tags(self.doc1.content), [self.t1.pk])
|
self.assertListEqual(
|
||||||
self.assertListEqual(self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk])
|
self.classifier.predict_tags(self.doc1.content), [self.t1.pk]
|
||||||
self.assertEqual(self.classifier.predict_document_type(self.doc1.content), self.dt.pk)
|
)
|
||||||
|
self.assertListEqual(
|
||||||
|
self.classifier.predict_tags(self.doc2.content), [self.t1.pk, self.t3.pk]
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self.classifier.predict_document_type(self.doc1.content), self.dt.pk
|
||||||
|
)
|
||||||
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
self.assertEqual(self.classifier.predict_document_type(self.doc2.content), None)
|
||||||
|
|
||||||
def testDatasetHashing(self):
|
def testDatasetHashing(self):
|
||||||
@ -90,7 +132,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
classifier2 = DocumentClassifier()
|
classifier2 = DocumentClassifier()
|
||||||
|
|
||||||
current_ver = DocumentClassifier.FORMAT_VERSION
|
current_ver = DocumentClassifier.FORMAT_VERSION
|
||||||
with mock.patch("documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver+1):
|
with mock.patch(
|
||||||
|
"documents.classifier.DocumentClassifier.FORMAT_VERSION", current_ver + 1
|
||||||
|
):
|
||||||
# assure that we won't load old classifiers.
|
# assure that we won't load old classifiers.
|
||||||
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
|
self.assertRaises(IncompatibleClassifierVersionError, classifier2.load)
|
||||||
|
|
||||||
@ -112,7 +156,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
new_classifier.load()
|
new_classifier.load()
|
||||||
self.assertFalse(new_classifier.train())
|
self.assertFalse(new_classifier.train())
|
||||||
|
|
||||||
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
|
@override_settings(
|
||||||
|
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||||
|
)
|
||||||
def test_load_and_classify(self):
|
def test_load_and_classify(self):
|
||||||
self.generate_test_data()
|
self.generate_test_data()
|
||||||
|
|
||||||
@ -122,38 +168,67 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
|
self.assertCountEqual(new_classifier.predict_tags(self.doc2.content), [45, 12])
|
||||||
|
|
||||||
def test_one_correspondent_predict(self):
|
def test_one_correspondent_predict(self):
|
||||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
c1 = Correspondent.objects.create(
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||||
|
)
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
content="this is a document from c1",
|
||||||
|
correspondent=c1,
|
||||||
|
checksum="A",
|
||||||
|
)
|
||||||
|
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||||
|
|
||||||
def test_one_correspondent_predict_manydocs(self):
|
def test_one_correspondent_predict_manydocs(self):
|
||||||
c1 = Correspondent.objects.create(name="c1", matching_algorithm=Correspondent.MATCH_AUTO)
|
c1 = Correspondent.objects.create(
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", correspondent=c1, checksum="A")
|
name="c1", matching_algorithm=Correspondent.MATCH_AUTO
|
||||||
doc2 = Document.objects.create(title="doc2", content="this is a document from noone", checksum="B")
|
)
|
||||||
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1",
|
||||||
|
content="this is a document from c1",
|
||||||
|
correspondent=c1,
|
||||||
|
checksum="A",
|
||||||
|
)
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
title="doc2", content="this is a document from noone", checksum="B"
|
||||||
|
)
|
||||||
|
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
self.assertEqual(self.classifier.predict_correspondent(doc1.content), c1.pk)
|
||||||
self.assertIsNone(self.classifier.predict_correspondent(doc2.content))
|
self.assertIsNone(self.classifier.predict_correspondent(doc2.content))
|
||||||
|
|
||||||
def test_one_type_predict(self):
|
def test_one_type_predict(self):
|
||||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
dt = DocumentType.objects.create(
|
||||||
|
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||||
|
)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
doc1 = Document.objects.create(
|
||||||
checksum="A", document_type=dt)
|
title="doc1",
|
||||||
|
content="this is a document from c1",
|
||||||
|
checksum="A",
|
||||||
|
document_type=dt,
|
||||||
|
)
|
||||||
|
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||||
|
|
||||||
def test_one_type_predict_manydocs(self):
|
def test_one_type_predict_manydocs(self):
|
||||||
dt = DocumentType.objects.create(name="dt", matching_algorithm=DocumentType.MATCH_AUTO)
|
dt = DocumentType.objects.create(
|
||||||
|
name="dt", matching_algorithm=DocumentType.MATCH_AUTO
|
||||||
|
)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1",
|
doc1 = Document.objects.create(
|
||||||
checksum="A", document_type=dt)
|
title="doc1",
|
||||||
|
content="this is a document from c1",
|
||||||
|
checksum="A",
|
||||||
|
document_type=dt,
|
||||||
|
)
|
||||||
|
|
||||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2",
|
doc2 = Document.objects.create(
|
||||||
checksum="B")
|
title="doc1", content="this is a document from c2", checksum="B"
|
||||||
|
)
|
||||||
|
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
self.assertEqual(self.classifier.predict_document_type(doc1.content), dt.pk)
|
||||||
@ -162,7 +237,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
def test_one_tag_predict(self):
|
def test_one_tag_predict(self):
|
||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c1", checksum="A"
|
||||||
|
)
|
||||||
|
|
||||||
doc1.tags.add(t1)
|
doc1.tags.add(t1)
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
@ -171,7 +248,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
def test_one_tag_predict_unassigned(self):
|
def test_one_tag_predict_unassigned(self):
|
||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c1", checksum="A"
|
||||||
|
)
|
||||||
|
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
self.assertListEqual(self.classifier.predict_tags(doc1.content), [])
|
self.assertListEqual(self.classifier.predict_tags(doc1.content), [])
|
||||||
@ -180,7 +259,9 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||||
|
|
||||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
doc4 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c4", checksum="D"
|
||||||
|
)
|
||||||
|
|
||||||
doc4.tags.add(t1)
|
doc4.tags.add(t1)
|
||||||
doc4.tags.add(t2)
|
doc4.tags.add(t2)
|
||||||
@ -191,10 +272,18 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
t2 = Tag.objects.create(name="t2", matching_algorithm=Tag.MATCH_AUTO, pk=121)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
doc1 = Document.objects.create(
|
||||||
doc2 = Document.objects.create(title="doc1", content="this is a document from c2", checksum="B")
|
title="doc1", content="this is a document from c1", checksum="A"
|
||||||
doc3 = Document.objects.create(title="doc1", content="this is a document from c3", checksum="C")
|
)
|
||||||
doc4 = Document.objects.create(title="doc1", content="this is a document from c4", checksum="D")
|
doc2 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c2", checksum="B"
|
||||||
|
)
|
||||||
|
doc3 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c3", checksum="C"
|
||||||
|
)
|
||||||
|
doc4 = Document.objects.create(
|
||||||
|
title="doc1", content="this is a document from c4", checksum="D"
|
||||||
|
)
|
||||||
|
|
||||||
doc1.tags.add(t1)
|
doc1.tags.add(t1)
|
||||||
doc2.tags.add(t2)
|
doc2.tags.add(t2)
|
||||||
@ -210,8 +299,12 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
def test_one_tag_predict_multi(self):
|
def test_one_tag_predict_multi(self):
|
||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
doc1 = Document.objects.create(
|
||||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
title="doc1", content="this is a document from c1", checksum="A"
|
||||||
|
)
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
title="doc2", content="this is a document from c2", checksum="B"
|
||||||
|
)
|
||||||
|
|
||||||
doc1.tags.add(t1)
|
doc1.tags.add(t1)
|
||||||
doc2.tags.add(t1)
|
doc2.tags.add(t1)
|
||||||
@ -222,8 +315,12 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
def test_one_tag_predict_multi_2(self):
|
def test_one_tag_predict_multi_2(self):
|
||||||
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
t1 = Tag.objects.create(name="t1", matching_algorithm=Tag.MATCH_AUTO, pk=12)
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", content="this is a document from c1", checksum="A")
|
doc1 = Document.objects.create(
|
||||||
doc2 = Document.objects.create(title="doc2", content="this is a document from c2", checksum="B")
|
title="doc1", content="this is a document from c1", checksum="A"
|
||||||
|
)
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
title="doc2", content="this is a document from c2", checksum="B"
|
||||||
|
)
|
||||||
|
|
||||||
doc1.tags.add(t1)
|
doc1.tags.add(t1)
|
||||||
self.classifier.train()
|
self.classifier.train()
|
||||||
@ -240,9 +337,15 @@ class TestClassifier(DirectoriesMixin, TestCase):
|
|||||||
self.assertIsNotNone(load_classifier())
|
self.assertIsNotNone(load_classifier())
|
||||||
load.assert_called_once()
|
load.assert_called_once()
|
||||||
|
|
||||||
@override_settings(CACHES={'default': {'BACKEND': 'django.core.cache.backends.locmem.LocMemCache'}})
|
@override_settings(
|
||||||
@override_settings(MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle"))
|
CACHES={"default": {"BACKEND": "django.core.cache.backends.locmem.LocMemCache"}}
|
||||||
@pytest.mark.skip(reason="Disabled caching due to high memory usage - need to investigate.")
|
)
|
||||||
|
@override_settings(
|
||||||
|
MODEL_FILE=os.path.join(os.path.dirname(__file__), "data", "model.pickle")
|
||||||
|
)
|
||||||
|
@pytest.mark.skip(
|
||||||
|
reason="Disabled caching due to high memory usage - need to investigate."
|
||||||
|
)
|
||||||
def test_load_classifier_cached(self):
|
def test_load_classifier_cached(self):
|
||||||
classifier = load_classifier()
|
classifier = load_classifier()
|
||||||
self.assertIsNotNone(classifier)
|
self.assertIsNotNone(classifier)
|
||||||
|
@ -31,21 +31,14 @@ class TestAttributes(TestCase):
|
|||||||
|
|
||||||
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
self.assertEqual(tuple([t.name for t in file_info.tags]), tags, filename)
|
||||||
|
|
||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
def test_guess_attributes_from_name_when_title_starts_with_dash(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'- weird but should not break.pdf',
|
"- weird but should not break.pdf", None, "- weird but should not break", ()
|
||||||
None,
|
|
||||||
'- weird but should not break',
|
|
||||||
()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
def test_guess_attributes_from_name_when_title_ends_with_dash(self):
|
||||||
self._test_guess_attributes_from_name(
|
self._test_guess_attributes_from_name(
|
||||||
'weird but should not break -.pdf',
|
"weird but should not break -.pdf", None, "weird but should not break -", ()
|
||||||
None,
|
|
||||||
'weird but should not break -',
|
|
||||||
()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -55,19 +48,13 @@ class TestFieldPermutations(TestCase):
|
|||||||
"20150102030405Z",
|
"20150102030405Z",
|
||||||
"20150102Z",
|
"20150102Z",
|
||||||
)
|
)
|
||||||
valid_correspondents = [
|
valid_correspondents = ["timmy", "Dr. McWheelie", "Dash Gor-don", "ο Θερμαστής", ""]
|
||||||
"timmy",
|
|
||||||
"Dr. McWheelie",
|
|
||||||
"Dash Gor-don",
|
|
||||||
"ο Θερμαστής",
|
|
||||||
""
|
|
||||||
]
|
|
||||||
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
|
valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
|
||||||
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
|
||||||
|
|
||||||
def _test_guessed_attributes(self, filename, created=None,
|
def _test_guessed_attributes(
|
||||||
correspondent=None, title=None,
|
self, filename, created=None, correspondent=None, title=None, tags=None
|
||||||
tags=None):
|
):
|
||||||
|
|
||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
|
|
||||||
@ -92,13 +79,10 @@ class TestFieldPermutations(TestCase):
|
|||||||
if tags is None:
|
if tags is None:
|
||||||
self.assertEqual(info.tags, (), filename)
|
self.assertEqual(info.tags, (), filename)
|
||||||
else:
|
else:
|
||||||
self.assertEqual(
|
self.assertEqual([t.name for t in info.tags], tags.split(","), filename)
|
||||||
[t.name for t in info.tags], tags.split(','),
|
|
||||||
filename
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_just_title(self):
|
def test_just_title(self):
|
||||||
template = '{title}.pdf'
|
template = "{title}.pdf"
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
spec = dict(title=title)
|
spec = dict(title=title)
|
||||||
filename = template.format(**spec)
|
filename = template.format(**spec)
|
||||||
@ -109,12 +93,8 @@ class TestFieldPermutations(TestCase):
|
|||||||
|
|
||||||
for created in self.valid_dates:
|
for created in self.valid_dates:
|
||||||
for title in self.valid_titles:
|
for title in self.valid_titles:
|
||||||
spec = {
|
spec = {"created": created, "title": title}
|
||||||
"created": created,
|
self._test_guessed_attributes(template.format(**spec), **spec)
|
||||||
"title": title
|
|
||||||
}
|
|
||||||
self._test_guessed_attributes(
|
|
||||||
template.format(**spec), **spec)
|
|
||||||
|
|
||||||
def test_invalid_date_format(self):
|
def test_invalid_date_format(self):
|
||||||
info = FileInfo.from_filename("06112017Z - title.pdf")
|
info = FileInfo.from_filename("06112017Z - title.pdf")
|
||||||
@ -137,14 +117,12 @@ class TestFieldPermutations(TestCase):
|
|||||||
self.assertIsNone(info.created)
|
self.assertIsNone(info.created)
|
||||||
|
|
||||||
# Pattern doesn't match (filename unaltered)
|
# Pattern doesn't match (filename unaltered)
|
||||||
with self.settings(
|
with self.settings(FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
||||||
FILENAME_PARSE_TRANSFORMS=[(none_patt, "none.gif")]):
|
|
||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
self.assertEqual(info.title, "tag1,tag2_20190908_180610_0001")
|
||||||
|
|
||||||
# Simple transformation (match all)
|
# Simple transformation (match all)
|
||||||
with self.settings(
|
with self.settings(FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
||||||
FILENAME_PARSE_TRANSFORMS=[(all_patt, "all.gif")]):
|
|
||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "all")
|
self.assertEqual(info.title, "all")
|
||||||
|
|
||||||
@ -152,7 +130,9 @@ class TestFieldPermutations(TestCase):
|
|||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[
|
FILENAME_PARSE_TRANSFORMS=[
|
||||||
(all_patt, "all.gif"),
|
(all_patt, "all.gif"),
|
||||||
(all_patt, "anotherall.gif")]):
|
(all_patt, "anotherall.gif"),
|
||||||
|
]
|
||||||
|
):
|
||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "all")
|
self.assertEqual(info.title, "all")
|
||||||
|
|
||||||
@ -160,13 +140,14 @@ class TestFieldPermutations(TestCase):
|
|||||||
with self.settings(
|
with self.settings(
|
||||||
FILENAME_PARSE_TRANSFORMS=[
|
FILENAME_PARSE_TRANSFORMS=[
|
||||||
(none_patt, "none.gif"),
|
(none_patt, "none.gif"),
|
||||||
(all_patt, "anotherall.gif")]):
|
(all_patt, "anotherall.gif"),
|
||||||
|
]
|
||||||
|
):
|
||||||
info = FileInfo.from_filename(filename)
|
info = FileInfo.from_filename(filename)
|
||||||
self.assertEqual(info.title, "anotherall")
|
self.assertEqual(info.title, "anotherall")
|
||||||
|
|
||||||
|
|
||||||
class DummyParser(DocumentParser):
|
class DummyParser(DocumentParser):
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||||
# not important during tests
|
# not important during tests
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
@ -184,7 +165,6 @@ class DummyParser(DocumentParser):
|
|||||||
|
|
||||||
|
|
||||||
class CopyParser(DocumentParser):
|
class CopyParser(DocumentParser):
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||||
return self.fake_thumb
|
return self.fake_thumb
|
||||||
|
|
||||||
@ -202,7 +182,6 @@ class CopyParser(DocumentParser):
|
|||||||
|
|
||||||
|
|
||||||
class FaultyParser(DocumentParser):
|
class FaultyParser(DocumentParser):
|
||||||
|
|
||||||
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
def get_thumbnail(self, document_path, mime_type, file_name=None):
|
||||||
# not important during tests
|
# not important during tests
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
@ -233,8 +212,15 @@ def fake_magic_from_file(file, mime=False):
|
|||||||
|
|
||||||
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
|
||||||
class TestConsumer(DirectoriesMixin, TestCase):
|
class TestConsumer(DirectoriesMixin, TestCase):
|
||||||
|
def _assert_first_last_send_progress(
|
||||||
def _assert_first_last_send_progress(self, first_status="STARTING", last_status="SUCCESS", first_progress=0, first_progress_max=100, last_progress=100, last_progress_max=100):
|
self,
|
||||||
|
first_status="STARTING",
|
||||||
|
last_status="SUCCESS",
|
||||||
|
first_progress=0,
|
||||||
|
first_progress_max=100,
|
||||||
|
last_progress=100,
|
||||||
|
last_progress_max=100,
|
||||||
|
):
|
||||||
|
|
||||||
self._send_progress.assert_called()
|
self._send_progress.assert_called()
|
||||||
|
|
||||||
@ -243,13 +229,17 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(args[1], first_progress_max)
|
self.assertEqual(args[1], first_progress_max)
|
||||||
self.assertEqual(args[2], first_status)
|
self.assertEqual(args[2], first_status)
|
||||||
|
|
||||||
args, kwargs = self._send_progress.call_args_list[len(self._send_progress.call_args_list) - 1]
|
args, kwargs = self._send_progress.call_args_list[
|
||||||
|
len(self._send_progress.call_args_list) - 1
|
||||||
|
]
|
||||||
self.assertEqual(args[0], last_progress)
|
self.assertEqual(args[0], last_progress)
|
||||||
self.assertEqual(args[1], last_progress_max)
|
self.assertEqual(args[1], last_progress_max)
|
||||||
self.assertEqual(args[2], last_status)
|
self.assertEqual(args[2], last_status)
|
||||||
|
|
||||||
def make_dummy_parser(self, logging_group, progress_callback=None):
|
def make_dummy_parser(self, logging_group, progress_callback=None):
|
||||||
return DummyParser(logging_group, self.dirs.scratch_dir, self.get_test_archive_file())
|
return DummyParser(
|
||||||
|
logging_group, self.dirs.scratch_dir, self.get_test_archive_file()
|
||||||
|
)
|
||||||
|
|
||||||
def make_faulty_parser(self, logging_group, progress_callback=None):
|
def make_faulty_parser(self, logging_group, progress_callback=None):
|
||||||
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
return FaultyParser(logging_group, self.dirs.scratch_dir)
|
||||||
@ -259,11 +249,16 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
patcher = mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
m = patcher.start()
|
m = patcher.start()
|
||||||
m.return_value = [(None, {
|
m.return_value = [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
"parser": self.make_dummy_parser,
|
"parser": self.make_dummy_parser,
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
"weight": 0
|
"weight": 0,
|
||||||
})]
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
self.addCleanup(patcher.stop)
|
self.addCleanup(patcher.stop)
|
||||||
|
|
||||||
# this prevents websocket message reports during testing.
|
# this prevents websocket message reports during testing.
|
||||||
@ -274,13 +269,21 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
self.consumer = Consumer()
|
self.consumer = Consumer()
|
||||||
|
|
||||||
def get_test_file(self):
|
def get_test_file(self):
|
||||||
src = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf")
|
src = os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"originals",
|
||||||
|
"0000001.pdf",
|
||||||
|
)
|
||||||
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
|
dst = os.path.join(self.dirs.scratch_dir, "sample.pdf")
|
||||||
shutil.copy(src, dst)
|
shutil.copy(src, dst)
|
||||||
return dst
|
return dst
|
||||||
|
|
||||||
def get_test_archive_file(self):
|
def get_test_archive_file(self):
|
||||||
src = os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf")
|
src = os.path.join(
|
||||||
|
os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"
|
||||||
|
)
|
||||||
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
|
dst = os.path.join(self.dirs.scratch_dir, "sample_archive.pdf")
|
||||||
shutil.copy(src, dst)
|
shutil.copy(src, dst)
|
||||||
return dst
|
return dst
|
||||||
@ -292,23 +295,19 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
document = self.consumer.try_consume_file(filename)
|
document = self.consumer.try_consume_file(filename)
|
||||||
|
|
||||||
self.assertEqual(document.content, "The Text")
|
self.assertEqual(document.content, "The Text")
|
||||||
self.assertEqual(document.title, os.path.splitext(os.path.basename(filename))[0])
|
self.assertEqual(
|
||||||
|
document.title, os.path.splitext(os.path.basename(filename))[0]
|
||||||
|
)
|
||||||
self.assertIsNone(document.correspondent)
|
self.assertIsNone(document.correspondent)
|
||||||
self.assertIsNone(document.document_type)
|
self.assertIsNone(document.document_type)
|
||||||
self.assertEqual(document.filename, "0000001.pdf")
|
self.assertEqual(document.filename, "0000001.pdf")
|
||||||
self.assertEqual(document.archive_filename, "0000001.pdf")
|
self.assertEqual(document.archive_filename, "0000001.pdf")
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
document.source_path
|
|
||||||
))
|
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(
|
self.assertTrue(os.path.isfile(document.thumbnail_path))
|
||||||
document.thumbnail_path
|
|
||||||
))
|
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(
|
self.assertTrue(os.path.isfile(document.archive_path))
|
||||||
document.archive_path
|
|
||||||
))
|
|
||||||
|
|
||||||
self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
|
self.assertEqual(document.checksum, "42995833e01aea9b3edee44bbfdd7ce1")
|
||||||
self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")
|
self.assertEqual(document.archive_checksum, "62acb0bcbfbcaa62ca6ad3668e4e404b")
|
||||||
@ -330,40 +329,45 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
document = self.consumer.try_consume_file(filename)
|
document = self.consumer.try_consume_file(filename)
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
document.source_path
|
|
||||||
))
|
|
||||||
|
|
||||||
self.assertFalse(os.path.isfile(shadow_file))
|
self.assertFalse(os.path.isfile(shadow_file))
|
||||||
self.assertFalse(os.path.isfile(filename))
|
self.assertFalse(os.path.isfile(filename))
|
||||||
|
|
||||||
|
|
||||||
def testOverrideFilename(self):
|
def testOverrideFilename(self):
|
||||||
filename = self.get_test_file()
|
filename = self.get_test_file()
|
||||||
override_filename = "Statement for November.pdf"
|
override_filename = "Statement for November.pdf"
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(filename, override_filename=override_filename)
|
document = self.consumer.try_consume_file(
|
||||||
|
filename, override_filename=override_filename
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(document.title, "Statement for November")
|
self.assertEqual(document.title, "Statement for November")
|
||||||
|
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testOverrideTitle(self):
|
def testOverrideTitle(self):
|
||||||
document = self.consumer.try_consume_file(self.get_test_file(), override_title="Override Title")
|
document = self.consumer.try_consume_file(
|
||||||
|
self.get_test_file(), override_title="Override Title"
|
||||||
|
)
|
||||||
self.assertEqual(document.title, "Override Title")
|
self.assertEqual(document.title, "Override Title")
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testOverrideCorrespondent(self):
|
def testOverrideCorrespondent(self):
|
||||||
c = Correspondent.objects.create(name="test")
|
c = Correspondent.objects.create(name="test")
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(self.get_test_file(), override_correspondent_id=c.pk)
|
document = self.consumer.try_consume_file(
|
||||||
|
self.get_test_file(), override_correspondent_id=c.pk
|
||||||
|
)
|
||||||
self.assertEqual(document.correspondent.id, c.id)
|
self.assertEqual(document.correspondent.id, c.id)
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
def testOverrideDocumentType(self):
|
def testOverrideDocumentType(self):
|
||||||
dt = DocumentType.objects.create(name="test")
|
dt = DocumentType.objects.create(name="test")
|
||||||
|
|
||||||
document = self.consumer.try_consume_file(self.get_test_file(), override_document_type_id=dt.pk)
|
document = self.consumer.try_consume_file(
|
||||||
|
self.get_test_file(), override_document_type_id=dt.pk
|
||||||
|
)
|
||||||
self.assertEqual(document.document_type.id, dt.id)
|
self.assertEqual(document.document_type.id, dt.id)
|
||||||
self._assert_first_last_send_progress()
|
self._assert_first_last_send_progress()
|
||||||
|
|
||||||
@ -371,7 +375,9 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
t1 = Tag.objects.create(name="t1")
|
t1 = Tag.objects.create(name="t1")
|
||||||
t2 = Tag.objects.create(name="t2")
|
t2 = Tag.objects.create(name="t2")
|
||||||
t3 = Tag.objects.create(name="t3")
|
t3 = Tag.objects.create(name="t3")
|
||||||
document = self.consumer.try_consume_file(self.get_test_file(), override_tag_ids=[t1.id, t3.id])
|
document = self.consumer.try_consume_file(
|
||||||
|
self.get_test_file(), override_tag_ids=[t1.id, t3.id]
|
||||||
|
)
|
||||||
|
|
||||||
self.assertIn(t1, document.tags.all())
|
self.assertIn(t1, document.tags.all())
|
||||||
self.assertNotIn(t2, document.tags.all())
|
self.assertNotIn(t2, document.tags.all())
|
||||||
@ -384,7 +390,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
ConsumerError,
|
ConsumerError,
|
||||||
"File not found",
|
"File not found",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
"non-existing-file"
|
"non-existing-file",
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
@ -396,7 +402,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
ConsumerError,
|
ConsumerError,
|
||||||
"It is a duplicate",
|
"It is a duplicate",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
self.get_test_file()
|
self.get_test_file(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
@ -408,7 +414,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
ConsumerError,
|
ConsumerError,
|
||||||
"It is a duplicate",
|
"It is a duplicate",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
self.get_test_archive_file()
|
self.get_test_archive_file(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
@ -425,25 +431,29 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
ConsumerError,
|
ConsumerError,
|
||||||
"sample.pdf: Unsupported mime type application/pdf",
|
"sample.pdf: Unsupported mime type application/pdf",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
self.get_test_file()
|
self.get_test_file(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
def testFaultyParser(self, m):
|
def testFaultyParser(self, m):
|
||||||
m.return_value = [(None, {
|
m.return_value = [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
"parser": self.make_faulty_parser,
|
"parser": self.make_faulty_parser,
|
||||||
"mime_types": {"application/pdf": ".pdf"},
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
"weight": 0
|
"weight": 0,
|
||||||
})]
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
self.assertRaisesMessage(
|
self.assertRaisesMessage(
|
||||||
ConsumerError,
|
ConsumerError,
|
||||||
"sample.pdf: Error while consuming document sample.pdf: Does not compute.",
|
"sample.pdf: Error while consuming document sample.pdf: Does not compute.",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
self.get_test_file()
|
self.get_test_file(),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
@ -457,7 +467,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
ConsumerError,
|
ConsumerError,
|
||||||
"sample.pdf: The following error occured while consuming sample.pdf: NO.",
|
"sample.pdf: The following error occured while consuming sample.pdf: NO.",
|
||||||
self.consumer.try_consume_file,
|
self.consumer.try_consume_file,
|
||||||
filename
|
filename,
|
||||||
)
|
)
|
||||||
|
|
||||||
self._assert_first_last_send_progress(last_status="FAILED")
|
self._assert_first_last_send_progress(last_status="FAILED")
|
||||||
@ -491,7 +501,7 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
filenames.insert(0, f)
|
filenames.insert(0, f)
|
||||||
return f
|
return f
|
||||||
|
|
||||||
m.side_effect = lambda f, archive_filename = False: get_filename()
|
m.side_effect = lambda f, archive_filename=False: get_filename()
|
||||||
|
|
||||||
filename = self.get_test_file()
|
filename = self.get_test_file()
|
||||||
|
|
||||||
@ -565,17 +575,37 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
def test_similar_filenames(self, m):
|
def test_similar_filenames(self, m):
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
shutil.copy(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"), os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"),
|
||||||
m.return_value = [(None, {
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple.png"),
|
||||||
|
os.path.join(settings.CONSUMPTION_DIR, "simple.png"),
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png"),
|
||||||
|
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"),
|
||||||
|
)
|
||||||
|
m.return_value = [
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
"parser": CopyParser,
|
"parser": CopyParser,
|
||||||
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
"mime_types": {"application/pdf": ".pdf", "image/png": ".png"},
|
||||||
"weight": 0
|
"weight": 0,
|
||||||
})]
|
},
|
||||||
doc1 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png"))
|
)
|
||||||
doc2 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.pdf"))
|
]
|
||||||
doc3 = self.consumer.try_consume_file(os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf"))
|
doc1 = self.consumer.try_consume_file(
|
||||||
|
os.path.join(settings.CONSUMPTION_DIR, "simple.png")
|
||||||
|
)
|
||||||
|
doc2 = self.consumer.try_consume_file(
|
||||||
|
os.path.join(settings.CONSUMPTION_DIR, "simple.pdf")
|
||||||
|
)
|
||||||
|
doc3 = self.consumer.try_consume_file(
|
||||||
|
os.path.join(settings.CONSUMPTION_DIR, "simple.png.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(doc1.filename, "simple.png")
|
self.assertEqual(doc1.filename, "simple.png")
|
||||||
self.assertEqual(doc1.archive_filename, "simple.pdf")
|
self.assertEqual(doc1.archive_filename, "simple.pdf")
|
||||||
@ -588,7 +618,6 @@ class TestConsumer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class PreConsumeTestCase(TestCase):
|
class PreConsumeTestCase(TestCase):
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Popen")
|
@mock.patch("documents.consumer.Popen")
|
||||||
@override_settings(PRE_CONSUME_SCRIPT=None)
|
@override_settings(PRE_CONSUME_SCRIPT=None)
|
||||||
def test_no_pre_consume_script(self, m):
|
def test_no_pre_consume_script(self, m):
|
||||||
@ -625,7 +654,6 @@ class PreConsumeTestCase(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class PostConsumeTestCase(TestCase):
|
class PostConsumeTestCase(TestCase):
|
||||||
|
|
||||||
@mock.patch("documents.consumer.Popen")
|
@mock.patch("documents.consumer.Popen")
|
||||||
@override_settings(POST_CONSUME_SCRIPT=None)
|
@override_settings(POST_CONSUME_SCRIPT=None)
|
||||||
def test_no_post_consume_script(self, m):
|
def test_no_post_consume_script(self, m):
|
||||||
@ -662,7 +690,9 @@ class PostConsumeTestCase(TestCase):
|
|||||||
with tempfile.NamedTemporaryFile() as script:
|
with tempfile.NamedTemporaryFile() as script:
|
||||||
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
with override_settings(POST_CONSUME_SCRIPT=script.name):
|
||||||
c = Correspondent.objects.create(name="my_bank")
|
c = Correspondent.objects.create(name="my_bank")
|
||||||
doc = Document.objects.create(title="Test", mime_type="application/pdf", correspondent=c)
|
doc = Document.objects.create(
|
||||||
|
title="Test", mime_type="application/pdf", correspondent=c
|
||||||
|
)
|
||||||
tag1 = Tag.objects.create(name="a")
|
tag1 = Tag.objects.create(name="a")
|
||||||
tag2 = Tag.objects.create(name="b")
|
tag2 = Tag.objects.create(name="b")
|
||||||
doc.tags.add(tag1)
|
doc.tags.add(tag1)
|
||||||
|
@ -12,7 +12,9 @@ from documents.parsers import parse_date
|
|||||||
|
|
||||||
class TestDate(TestCase):
|
class TestDate(TestCase):
|
||||||
|
|
||||||
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "../../paperless_tesseract/tests/samples")
|
SAMPLE_FILES = os.path.join(
|
||||||
|
os.path.dirname(__file__), "../../paperless_tesseract/tests/samples"
|
||||||
|
)
|
||||||
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
SCRATCH = "/tmp/paperless-tests-{}".format(str(uuid4())[:8])
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
@ -38,24 +40,15 @@ class TestDate(TestCase):
|
|||||||
date = parse_date("", text)
|
date = parse_date("", text)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
date,
|
date,
|
||||||
datetime.datetime(
|
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2018, 2, 13, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_date_format_5(self):
|
def test_date_format_5(self):
|
||||||
text = (
|
text = "lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem " "ipsum"
|
||||||
"lorem ipsum 130218, 2018, 20180213 and lorem 13.02.2018 lorem "
|
|
||||||
"ipsum"
|
|
||||||
)
|
|
||||||
date = parse_date("", text)
|
date = parse_date("", text)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
date,
|
date,
|
||||||
datetime.datetime(
|
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2018, 2, 13, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_date_format_6(self):
|
def test_date_format_6(self):
|
||||||
@ -73,18 +66,11 @@ class TestDate(TestCase):
|
|||||||
self.assertEqual(parse_date("", text), None)
|
self.assertEqual(parse_date("", text), None)
|
||||||
|
|
||||||
def test_date_format_7(self):
|
def test_date_format_7(self):
|
||||||
text = (
|
text = "lorem ipsum\n" "März 2019\n" "lorem ipsum"
|
||||||
"lorem ipsum\n"
|
|
||||||
"März 2019\n"
|
|
||||||
"lorem ipsum"
|
|
||||||
)
|
|
||||||
date = parse_date("", text)
|
date = parse_date("", text)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
date,
|
date,
|
||||||
datetime.datetime(
|
datetime.datetime(2019, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2019, 3, 1, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_date_format_8(self):
|
def test_date_format_8(self):
|
||||||
@ -102,26 +88,15 @@ class TestDate(TestCase):
|
|||||||
)
|
)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
parse_date("", text),
|
parse_date("", text),
|
||||||
datetime.datetime(
|
datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2020, 3, 1, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@override_settings(SCRATCH_DIR=SCRATCH)
|
@override_settings(SCRATCH_DIR=SCRATCH)
|
||||||
def test_date_format_9(self):
|
def test_date_format_9(self):
|
||||||
text = (
|
text = "lorem ipsum\n" "27. Nullmonth 2020\n" "März 2020\n" "lorem ipsum"
|
||||||
"lorem ipsum\n"
|
|
||||||
"27. Nullmonth 2020\n"
|
|
||||||
"März 2020\n"
|
|
||||||
"lorem ipsum"
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
parse_date("", text),
|
parse_date("", text),
|
||||||
datetime.datetime(
|
datetime.datetime(2020, 3, 1, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2020, 3, 1, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_crazy_date_past(self, *args):
|
def test_crazy_date_past(self, *args):
|
||||||
@ -135,19 +110,17 @@ class TestDate(TestCase):
|
|||||||
|
|
||||||
@override_settings(FILENAME_DATE_ORDER="YMD")
|
@override_settings(FILENAME_DATE_ORDER="YMD")
|
||||||
def test_filename_date_parse_invalid(self, *args):
|
def test_filename_date_parse_invalid(self, *args):
|
||||||
self.assertIsNone(parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here"))
|
self.assertIsNone(
|
||||||
|
parse_date("/tmp/20 408000l 2475 - test.pdf", "No date in here")
|
||||||
@override_settings(IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17)))
|
|
||||||
def test_ignored_dates(self, *args):
|
|
||||||
text = (
|
|
||||||
"lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem "
|
|
||||||
"ipsum"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
IGNORE_DATES=(datetime.date(2019, 11, 3), datetime.date(2020, 1, 17))
|
||||||
|
)
|
||||||
|
def test_ignored_dates(self, *args):
|
||||||
|
text = "lorem ipsum 110319, 20200117 and lorem 13.02.2018 lorem " "ipsum"
|
||||||
date = parse_date("", text)
|
date = parse_date("", text)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
date,
|
date,
|
||||||
datetime.datetime(
|
datetime.datetime(2018, 2, 13, 0, 0, tzinfo=tz.gettz(settings.TIME_ZONE)),
|
||||||
2018, 2, 13, 0, 0,
|
|
||||||
tzinfo=tz.gettz(settings.TIME_ZONE)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
@ -10,7 +10,6 @@ from ..models import Document, Correspondent
|
|||||||
|
|
||||||
|
|
||||||
class TestDocument(TestCase):
|
class TestDocument(TestCase):
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.originals_dir = tempfile.mkdtemp()
|
self.originals_dir = tempfile.mkdtemp()
|
||||||
self.thumb_dir = tempfile.mkdtemp()
|
self.thumb_dir = tempfile.mkdtemp()
|
||||||
@ -30,7 +29,7 @@ class TestDocument(TestCase):
|
|||||||
title="Title",
|
title="Title",
|
||||||
content="content",
|
content="content",
|
||||||
checksum="checksum",
|
checksum="checksum",
|
||||||
mime_type="application/pdf"
|
mime_type="application/pdf",
|
||||||
)
|
)
|
||||||
|
|
||||||
file_path = document.source_path
|
file_path = document.source_path
|
||||||
@ -47,20 +46,36 @@ class TestDocument(TestCase):
|
|||||||
|
|
||||||
def test_file_name(self):
|
def test_file_name(self):
|
||||||
|
|
||||||
doc = Document(mime_type="application/pdf", title="test", created=timezone.datetime(2020, 12, 25))
|
doc = Document(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="test",
|
||||||
|
created=timezone.datetime(2020, 12, 25),
|
||||||
|
)
|
||||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
|
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.pdf")
|
||||||
|
|
||||||
def test_file_name_jpg(self):
|
def test_file_name_jpg(self):
|
||||||
|
|
||||||
doc = Document(mime_type="image/jpeg", title="test", created=timezone.datetime(2020, 12, 25))
|
doc = Document(
|
||||||
|
mime_type="image/jpeg",
|
||||||
|
title="test",
|
||||||
|
created=timezone.datetime(2020, 12, 25),
|
||||||
|
)
|
||||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
|
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.jpg")
|
||||||
|
|
||||||
def test_file_name_unknown(self):
|
def test_file_name_unknown(self):
|
||||||
|
|
||||||
doc = Document(mime_type="application/zip", title="test", created=timezone.datetime(2020, 12, 25))
|
doc = Document(
|
||||||
|
mime_type="application/zip",
|
||||||
|
title="test",
|
||||||
|
created=timezone.datetime(2020, 12, 25),
|
||||||
|
)
|
||||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
|
self.assertEqual(doc.get_public_filename(), "2020-12-25 test.zip")
|
||||||
|
|
||||||
def test_file_name_invalid_type(self):
|
def test_file_name_invalid_type(self):
|
||||||
|
|
||||||
doc = Document(mime_type="image/jpegasd", title="test", created=timezone.datetime(2020, 12, 25))
|
doc = Document(
|
||||||
|
mime_type="image/jpegasd",
|
||||||
|
title="test",
|
||||||
|
created=timezone.datetime(2020, 12, 25),
|
||||||
|
)
|
||||||
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
self.assertEqual(doc.get_public_filename(), "2020-12-25 test")
|
||||||
|
@ -13,13 +13,16 @@ from django.test import TestCase, override_settings
|
|||||||
from django.utils import timezone
|
from django.utils import timezone
|
||||||
|
|
||||||
from .utils import DirectoriesMixin
|
from .utils import DirectoriesMixin
|
||||||
from ..file_handling import generate_filename, create_source_path_directory, delete_empty_directories, \
|
from ..file_handling import (
|
||||||
generate_unique_filename
|
generate_filename,
|
||||||
|
create_source_path_directory,
|
||||||
|
delete_empty_directories,
|
||||||
|
generate_unique_filename,
|
||||||
|
)
|
||||||
from ..models import Document, Correspondent, Tag, DocumentType
|
from ..models import Document, Correspondent, Tag, DocumentType
|
||||||
|
|
||||||
|
|
||||||
class TestFileHandling(DirectoriesMixin, TestCase):
|
class TestFileHandling(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
def test_generate_source_filename(self):
|
def test_generate_source_filename(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
@ -30,8 +33,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))
|
self.assertEqual(generate_filename(document), "{:07d}.pdf".format(document.pk))
|
||||||
|
|
||||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(
|
||||||
"{:07d}.pdf.gpg".format(document.pk))
|
generate_filename(document), "{:07d}.pdf.gpg".format(document.pk)
|
||||||
|
)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
def test_file_renaming(self):
|
def test_file_renaming(self):
|
||||||
@ -41,7 +45,10 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Test default source_path
|
# Test default source_path
|
||||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk))
|
self.assertEqual(
|
||||||
|
document.source_path,
|
||||||
|
settings.ORIGINALS_DIR + "/{:07d}.pdf".format(document.pk),
|
||||||
|
)
|
||||||
|
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
|
|
||||||
@ -51,8 +58,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
# Enable encryption and check again
|
# Enable encryption and check again
|
||||||
document.storage_type = Document.STORAGE_TYPE_GPG
|
document.storage_type = Document.STORAGE_TYPE_GPG
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf.gpg")
|
||||||
"none/none.pdf.gpg")
|
|
||||||
|
|
||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
@ -68,7 +74,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/test"), True)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/test/test.pdf.gpg"), True
|
||||||
|
)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
def test_file_renaming_missing_permissions(self):
|
def test_file_renaming_missing_permissions(self):
|
||||||
@ -79,13 +87,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
"none/none.pdf")
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
|
|
||||||
# Test source_path
|
# Test source_path
|
||||||
self.assertEqual(document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf")
|
self.assertEqual(
|
||||||
|
document.source_path, settings.ORIGINALS_DIR + "/none/none.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
# Make the folder read- and execute-only (no writing and no renaming)
|
# Make the folder read- and execute-only (no writing and no renaming)
|
||||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
os.chmod(settings.ORIGINALS_DIR + "/none", 0o555)
|
||||||
@ -95,7 +104,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||||
|
)
|
||||||
self.assertEqual(document.filename, "none/none.pdf")
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
|
|
||||||
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
os.chmod(settings.ORIGINALS_DIR + "/none", 0o777)
|
||||||
@ -103,7 +114,11 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
def test_file_renaming_database_error(self):
|
def test_file_renaming_database_error(self):
|
||||||
|
|
||||||
document1 = Document.objects.create(mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
|
document1 = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
storage_type=Document.STORAGE_TYPE_UNENCRYPTED,
|
||||||
|
checksum="AAAAA",
|
||||||
|
)
|
||||||
|
|
||||||
document = Document()
|
document = Document()
|
||||||
document.mime_type = "application/pdf"
|
document.mime_type = "application/pdf"
|
||||||
@ -113,8 +128,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
"none/none.pdf")
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
|
|
||||||
@ -122,8 +136,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
self.assertTrue(os.path.isfile(document.source_path))
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
|
|
||||||
# Set a correspondent and save the document
|
# Set a correspondent and save the document
|
||||||
document.correspondent = Correspondent.objects.get_or_create(
|
document.correspondent = Correspondent.objects.get_or_create(name="test")[0]
|
||||||
name="test")[0]
|
|
||||||
|
|
||||||
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
||||||
m.side_effect = DatabaseError()
|
m.side_effect = DatabaseError()
|
||||||
@ -131,7 +144,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Check proper handling of files
|
# Check proper handling of files
|
||||||
self.assertTrue(os.path.isfile(document.source_path))
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), True
|
||||||
|
)
|
||||||
self.assertEqual(document.filename, "none/none.pdf")
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
|
||||||
@ -143,8 +158,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
"none/none.pdf")
|
|
||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
@ -152,10 +166,15 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
# Ensure file deletion after delete
|
# Ensure file deletion after delete
|
||||||
pk = document.pk
|
pk = document.pk
|
||||||
document.delete()
|
document.delete()
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||||
|
)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}", TRASH_DIR=tempfile.mkdtemp())
|
@override_settings(
|
||||||
|
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}",
|
||||||
|
TRASH_DIR=tempfile.mkdtemp(),
|
||||||
|
)
|
||||||
def test_document_delete_trash(self):
|
def test_document_delete_trash(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.mime_type = "application/pdf"
|
document.mime_type = "application/pdf"
|
||||||
@ -164,8 +183,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
"none/none.pdf")
|
|
||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
@ -173,7 +191,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
# Ensure file was moved to trash after delete
|
# Ensure file was moved to trash after delete
|
||||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
|
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none/none.pdf"), False)
|
||||||
document.delete()
|
document.delete()
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/none/none.pdf"), False
|
||||||
|
)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
|
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none.pdf"), True)
|
||||||
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), False)
|
self.assertEqual(os.path.isfile(settings.TRASH_DIR + "/none_01.pdf"), False)
|
||||||
@ -207,8 +227,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
document.filename = generate_filename(document)
|
document.filename = generate_filename(document)
|
||||||
self.assertEqual(document.filename,
|
self.assertEqual(document.filename, "none/none.pdf")
|
||||||
"none/none.pdf")
|
|
||||||
|
|
||||||
create_source_path_directory(document.source_path)
|
create_source_path_directory(document.source_path)
|
||||||
|
|
||||||
@ -238,8 +257,18 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{asn} - {title}")
|
||||||
def test_asn(self):
|
def test_asn(self):
|
||||||
d1 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=652, checksum="A")
|
d1 = Document.objects.create(
|
||||||
d2 = Document.objects.create(title="the_doc", mime_type="application/pdf", archive_serial_number=None, checksum="B")
|
title="the_doc",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_serial_number=652,
|
||||||
|
checksum="A",
|
||||||
|
)
|
||||||
|
d2 = Document.objects.create(
|
||||||
|
title="the_doc",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_serial_number=None,
|
||||||
|
checksum="B",
|
||||||
|
)
|
||||||
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
|
self.assertEqual(generate_filename(d1), "652 - the_doc.pdf")
|
||||||
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
|
self.assertEqual(generate_filename(d2), "none - the_doc.pdf")
|
||||||
|
|
||||||
@ -256,8 +285,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||||
"demo.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||||
def test_tags_with_dash(self):
|
def test_tags_with_dash(self):
|
||||||
@ -272,8 +300,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||||
"demo.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
|
||||||
def test_tags_malformed(self):
|
def test_tags_malformed(self):
|
||||||
@ -288,8 +315,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document), "none.pdf")
|
||||||
"none.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
|
||||||
def test_tags_all(self):
|
def test_tags_all(self):
|
||||||
@ -303,8 +329,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document), "demo.pdf")
|
||||||
"demo.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
|
||||||
def test_tags_out_of_bounds(self):
|
def test_tags_out_of_bounds(self):
|
||||||
@ -318,8 +343,7 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
document.save()
|
document.save()
|
||||||
|
|
||||||
# Ensure that filename is properly generated
|
# Ensure that filename is properly generated
|
||||||
self.assertEqual(generate_filename(document),
|
self.assertEqual(generate_filename(document), "none.pdf")
|
||||||
"none.pdf")
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{tags}")
|
||||||
def test_tags_without_args(self):
|
def test_tags_without_args(self):
|
||||||
@ -338,7 +362,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
|
self.assertEqual(generate_filename(doc), "doc1 tag1,tag2.pdf")
|
||||||
|
|
||||||
doc = Document.objects.create(title="doc2", checksum="B", mime_type="application/pdf")
|
doc = Document.objects.create(
|
||||||
|
title="doc2", checksum="B", mime_type="application/pdf"
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(generate_filename(doc), "doc2.pdf")
|
self.assertEqual(generate_filename(doc), "doc2.pdf")
|
||||||
|
|
||||||
@ -348,12 +374,19 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
doc.filename = generate_filename(doc)
|
doc.filename = generate_filename(doc)
|
||||||
doc.save()
|
doc.save()
|
||||||
|
|
||||||
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"))
|
self.assertEqual(
|
||||||
|
doc.source_path,
|
||||||
|
os.path.join(settings.ORIGINALS_DIR, "etc", "something", "doc1.pdf"),
|
||||||
|
)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}")
|
@override_settings(
|
||||||
|
PAPERLESS_FILENAME_FORMAT="{created_year}-{created_month}-{created_day}"
|
||||||
|
)
|
||||||
def test_created_year_month_day(self):
|
def test_created_year_month_day(self):
|
||||||
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
|
d1 = timezone.make_aware(datetime.datetime(2020, 3, 6, 1, 1, 1))
|
||||||
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", created=d1)
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1", mime_type="application/pdf", created=d1
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
|
self.assertEqual(generate_filename(doc1), "2020-03-06.pdf")
|
||||||
|
|
||||||
@ -361,10 +394,14 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}")
|
@override_settings(
|
||||||
|
PAPERLESS_FILENAME_FORMAT="{added_year}-{added_month}-{added_day}"
|
||||||
|
)
|
||||||
def test_added_year_month_day(self):
|
def test_added_year_month_day(self):
|
||||||
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
|
d1 = timezone.make_aware(datetime.datetime(232, 1, 9, 1, 1, 1))
|
||||||
doc1 = Document.objects.create(title="doc1", mime_type="application/pdf", added=d1)
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1", mime_type="application/pdf", added=d1
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
|
self.assertEqual(generate_filename(doc1), "232-01-09.pdf")
|
||||||
|
|
||||||
@ -372,7 +409,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
self.assertEqual(generate_filename(doc1), "2020-11-16.pdf")
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
|
@override_settings(
|
||||||
|
PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}"
|
||||||
|
)
|
||||||
def test_nested_directory_cleanup(self):
|
def test_nested_directory_cleanup(self):
|
||||||
document = Document()
|
document = Document()
|
||||||
document.mime_type = "application/pdf"
|
document.mime_type = "application/pdf"
|
||||||
@ -391,7 +430,9 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
pk = document.pk
|
pk = document.pk
|
||||||
document.delete()
|
document.delete()
|
||||||
|
|
||||||
self.assertEqual(os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False)
|
self.assertEqual(
|
||||||
|
os.path.isfile(settings.ORIGINALS_DIR + "/none/none/none.pdf"), False
|
||||||
|
)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none/none"), False)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR + "/none"), False)
|
||||||
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
self.assertEqual(os.path.isdir(settings.ORIGINALS_DIR), True)
|
||||||
@ -414,12 +455,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
Path(os.path.join(tmp, "notempty", "file")).touch()
|
Path(os.path.join(tmp, "notempty", "file")).touch()
|
||||||
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
os.makedirs(os.path.join(tmp, "notempty", "empty"))
|
||||||
|
|
||||||
delete_empty_directories(os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR)
|
delete_empty_directories(
|
||||||
|
os.path.join(tmp, "notempty", "empty"), root=settings.ORIGINALS_DIR
|
||||||
|
)
|
||||||
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty")), True)
|
||||||
self.assertEqual(os.path.isfile(
|
self.assertEqual(os.path.isfile(os.path.join(tmp, "notempty", "file")), True)
|
||||||
os.path.join(tmp, "notempty", "file")), True)
|
self.assertEqual(os.path.isdir(os.path.join(tmp, "notempty", "empty")), False)
|
||||||
self.assertEqual(os.path.isdir(
|
|
||||||
os.path.join(tmp, "notempty", "empty")), False)
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{created/[title]")
|
||||||
def test_invalid_format(self):
|
def test_invalid_format(self):
|
||||||
@ -441,8 +482,12 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||||
def test_duplicates(self):
|
def test_duplicates(self):
|
||||||
document = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="A", pk=1)
|
document = Document.objects.create(
|
||||||
document2 = Document.objects.create(mime_type="application/pdf", title="qwe", checksum="B", pk=2)
|
mime_type="application/pdf", title="qwe", checksum="A", pk=1
|
||||||
|
)
|
||||||
|
document2 = Document.objects.create(
|
||||||
|
mime_type="application/pdf", title="qwe", checksum="B", pk=2
|
||||||
|
)
|
||||||
Path(document.source_path).touch()
|
Path(document.source_path).touch()
|
||||||
Path(document2.source_path).touch()
|
Path(document2.source_path).touch()
|
||||||
document.filename = "0000001.pdf"
|
document.filename = "0000001.pdf"
|
||||||
@ -480,11 +525,17 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
self.assertTrue(os.path.isfile(document.source_path))
|
self.assertTrue(os.path.isfile(document.source_path))
|
||||||
self.assertEqual(document2.filename, "qwe.pdf")
|
self.assertEqual(document2.filename, "qwe.pdf")
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||||
@mock.patch("documents.signals.handlers.Document.objects.filter")
|
@mock.patch("documents.signals.handlers.Document.objects.filter")
|
||||||
def test_no_update_without_change(self, m):
|
def test_no_update_without_change(self, m):
|
||||||
doc = Document.objects.create(title="document", filename="document.pdf", archive_filename="document.pdf", checksum="A", archive_checksum="B", mime_type="application/pdf")
|
doc = Document.objects.create(
|
||||||
|
title="document",
|
||||||
|
filename="document.pdf",
|
||||||
|
archive_filename="document.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
Path(doc.source_path).touch()
|
Path(doc.source_path).touch()
|
||||||
Path(doc.archive_path).touch()
|
Path(doc.archive_path).touch()
|
||||||
|
|
||||||
@ -493,16 +544,20 @@ class TestFileHandling(DirectoriesMixin, TestCase):
|
|||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
@override_settings(PAPERLESS_FILENAME_FORMAT=None)
|
||||||
def test_create_no_format(self):
|
def test_create_no_format(self):
|
||||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
archive_checksum="B",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(original))
|
self.assertTrue(os.path.isfile(original))
|
||||||
self.assertTrue(os.path.isfile(archive))
|
self.assertTrue(os.path.isfile(archive))
|
||||||
@ -515,21 +570,39 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertFalse(os.path.isfile(original))
|
self.assertFalse(os.path.isfile(original))
|
||||||
self.assertFalse(os.path.isfile(archive))
|
self.assertFalse(os.path.isfile(archive))
|
||||||
self.assertTrue(os.path.isfile(doc.source_path))
|
self.assertTrue(os.path.isfile(doc.source_path))
|
||||||
self.assertTrue(os.path.isfile(doc.archive_path))
|
self.assertTrue(os.path.isfile(doc.archive_path))
|
||||||
self.assertEqual(doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf"))
|
self.assertEqual(
|
||||||
self.assertEqual(doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf"))
|
doc.source_path, os.path.join(settings.ORIGINALS_DIR, "none", "my_doc.pdf")
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
doc.archive_path, os.path.join(settings.ARCHIVE_DIR, "none", "my_doc.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
def test_move_archive_gone(self):
|
def test_move_archive_gone(self):
|
||||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(original))
|
self.assertTrue(os.path.isfile(original))
|
||||||
self.assertFalse(os.path.isfile(archive))
|
self.assertFalse(os.path.isfile(archive))
|
||||||
@ -545,7 +618,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
|
os.makedirs(os.path.join(settings.ARCHIVE_DIR, "none"))
|
||||||
Path(existing_archive_file).touch()
|
Path(existing_archive_file).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertFalse(os.path.isfile(original))
|
self.assertFalse(os.path.isfile(original))
|
||||||
self.assertFalse(os.path.isfile(archive))
|
self.assertFalse(os.path.isfile(archive))
|
||||||
@ -561,8 +641,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
|
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document_01.pdf", checksum="A",
|
doc = Document.objects.create(
|
||||||
archive_checksum="B", archive_filename="document.pdf")
|
mime_type="application/pdf",
|
||||||
|
title="document",
|
||||||
|
filename="document_01.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="document.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(doc.filename, "document.pdf")
|
self.assertEqual(doc.filename, "document.pdf")
|
||||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||||
@ -577,8 +663,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
|
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="document", filename="document.pdf", checksum="A",
|
doc = Document.objects.create(
|
||||||
archive_checksum="B", archive_filename="document_01.pdf")
|
mime_type="application/pdf",
|
||||||
|
title="document",
|
||||||
|
filename="document.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="document_01.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(doc.filename, "document.pdf")
|
self.assertEqual(doc.filename, "document.pdf")
|
||||||
self.assertEqual(doc.archive_filename, "document.pdf")
|
self.assertEqual(doc.archive_filename, "document.pdf")
|
||||||
@ -589,7 +681,6 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
@mock.patch("documents.signals.handlers.os.rename")
|
@mock.patch("documents.signals.handlers.os.rename")
|
||||||
def test_move_archive_error(self, m):
|
def test_move_archive_error(self, m):
|
||||||
|
|
||||||
def fake_rename(src, dst):
|
def fake_rename(src, dst):
|
||||||
if "archive" in src:
|
if "archive" in src:
|
||||||
raise OSError()
|
raise OSError()
|
||||||
@ -603,7 +694,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
m.assert_called()
|
m.assert_called()
|
||||||
self.assertTrue(os.path.isfile(original))
|
self.assertTrue(os.path.isfile(original))
|
||||||
@ -615,9 +713,16 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
def test_move_file_gone(self):
|
def test_move_file_gone(self):
|
||||||
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
original = os.path.join(settings.ORIGINALS_DIR, "0000001.pdf")
|
||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
#Path(original).touch()
|
# Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertFalse(os.path.isfile(original))
|
self.assertFalse(os.path.isfile(original))
|
||||||
self.assertTrue(os.path.isfile(archive))
|
self.assertTrue(os.path.isfile(archive))
|
||||||
@ -627,7 +732,6 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
@mock.patch("documents.signals.handlers.os.rename")
|
@mock.patch("documents.signals.handlers.os.rename")
|
||||||
def test_move_file_error(self, m):
|
def test_move_file_error(self, m):
|
||||||
|
|
||||||
def fake_rename(src, dst):
|
def fake_rename(src, dst):
|
||||||
if "original" in src:
|
if "original" in src:
|
||||||
raise OSError()
|
raise OSError()
|
||||||
@ -641,7 +745,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", archive_filename="0000001.pdf", checksum="A", archive_checksum="B")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
)
|
||||||
|
|
||||||
m.assert_called()
|
m.assert_called()
|
||||||
self.assertTrue(os.path.isfile(original))
|
self.assertTrue(os.path.isfile(original))
|
||||||
@ -655,7 +766,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document.objects.create(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(original))
|
self.assertTrue(os.path.isfile(original))
|
||||||
self.assertTrue(os.path.isfile(archive))
|
self.assertTrue(os.path.isfile(archive))
|
||||||
@ -678,8 +796,20 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
Path(original2).touch()
|
Path(original2).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
|
|
||||||
doc1 = Document.objects.create(mime_type="image/png", title="document", filename="document.png", checksum="A", archive_checksum="B", archive_filename="0000001.pdf")
|
doc1 = Document.objects.create(
|
||||||
doc2 = Document.objects.create(mime_type="application/pdf", title="0000001", filename="0000001.pdf", checksum="C")
|
mime_type="image/png",
|
||||||
|
title="document",
|
||||||
|
filename="document.png",
|
||||||
|
checksum="A",
|
||||||
|
archive_checksum="B",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="0000001",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="C",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(os.path.isfile(doc1.source_path))
|
self.assertTrue(os.path.isfile(doc1.source_path))
|
||||||
self.assertTrue(os.path.isfile(doc1.archive_path))
|
self.assertTrue(os.path.isfile(doc1.archive_path))
|
||||||
@ -698,7 +828,14 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
archive = os.path.join(settings.ARCHIVE_DIR, "0000001.pdf")
|
||||||
Path(original).touch()
|
Path(original).touch()
|
||||||
Path(archive).touch()
|
Path(archive).touch()
|
||||||
doc = Document(mime_type="application/pdf", title="my_doc", filename="0000001.pdf", checksum="A", archive_filename="0000001.pdf", archive_checksum="B")
|
doc = Document(
|
||||||
|
mime_type="application/pdf",
|
||||||
|
title="my_doc",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
checksum="A",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
archive_checksum="B",
|
||||||
|
)
|
||||||
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
with mock.patch("documents.signals.handlers.Document.objects.filter") as m:
|
||||||
m.side_effect = DatabaseError()
|
m.side_effect = DatabaseError()
|
||||||
doc.save()
|
doc.save()
|
||||||
@ -710,28 +847,38 @@ class TestFileHandlingWithArchive(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestFilenameGeneration(TestCase):
|
class TestFilenameGeneration(TestCase):
|
||||||
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||||
@override_settings(
|
|
||||||
PAPERLESS_FILENAME_FORMAT="{title}"
|
|
||||||
)
|
|
||||||
def test_invalid_characters(self):
|
def test_invalid_characters(self):
|
||||||
|
|
||||||
doc = Document.objects.create(title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1")
|
doc = Document.objects.create(
|
||||||
|
title="This. is the title.", mime_type="application/pdf", pk=1, checksum="1"
|
||||||
|
)
|
||||||
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
|
self.assertEqual(generate_filename(doc), "This. is the title.pdf")
|
||||||
|
|
||||||
doc = Document.objects.create(title="my\\invalid/../title:yay", mime_type="application/pdf", pk=2, checksum="2")
|
doc = Document.objects.create(
|
||||||
|
title="my\\invalid/../title:yay",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
pk=2,
|
||||||
|
checksum="2",
|
||||||
|
)
|
||||||
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
|
self.assertEqual(generate_filename(doc), "my-invalid-..-title-yay.pdf")
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{created}")
|
||||||
PAPERLESS_FILENAME_FORMAT="{created}"
|
|
||||||
)
|
|
||||||
def test_date(self):
|
def test_date(self):
|
||||||
doc = Document.objects.create(title="does not matter", created=timezone.make_aware(datetime.datetime(2020,5,21, 7,36,51, 153)), mime_type="application/pdf", pk=2, checksum="2")
|
doc = Document.objects.create(
|
||||||
|
title="does not matter",
|
||||||
|
created=timezone.make_aware(datetime.datetime(2020, 5, 21, 7, 36, 51, 153)),
|
||||||
|
mime_type="application/pdf",
|
||||||
|
pk=2,
|
||||||
|
checksum="2",
|
||||||
|
)
|
||||||
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
|
self.assertEqual(generate_filename(doc), "2020-05-21.pdf")
|
||||||
|
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
doc = Document.objects.create(checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow")
|
doc = Document.objects.create(
|
||||||
|
checksum=str(uuid.uuid4()), title=str(uuid.uuid4()), content="wow"
|
||||||
|
)
|
||||||
doc.filename = generate_unique_filename(doc)
|
doc.filename = generate_unique_filename(doc)
|
||||||
Path(doc.thumbnail_path).touch()
|
Path(doc.thumbnail_path).touch()
|
||||||
with open(doc.source_path, "w") as f:
|
with open(doc.source_path, "w") as f:
|
||||||
|
@ -6,14 +6,14 @@ from ..management.commands.document_importer import Command
|
|||||||
|
|
||||||
|
|
||||||
class TestImporter(TestCase):
|
class TestImporter(TestCase):
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
TestCase.__init__(self, *args, **kwargs)
|
TestCase.__init__(self, *args, **kwargs)
|
||||||
|
|
||||||
def test_check_manifest_exists(self):
|
def test_check_manifest_exists(self):
|
||||||
cmd = Command()
|
cmd = Command()
|
||||||
self.assertRaises(
|
self.assertRaises(
|
||||||
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json")
|
CommandError, cmd._check_manifest_exists, "/tmp/manifest.json"
|
||||||
|
)
|
||||||
|
|
||||||
def test_check_manifest(self):
|
def test_check_manifest(self):
|
||||||
|
|
||||||
@ -23,15 +23,14 @@ class TestImporter(TestCase):
|
|||||||
cmd.manifest = [{"model": "documents.document"}]
|
cmd.manifest = [{"model": "documents.document"}]
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest()
|
||||||
self.assertTrue(
|
self.assertTrue("The manifest file contains a record" in str(cm.exception))
|
||||||
'The manifest file contains a record' in str(cm.exception))
|
|
||||||
|
|
||||||
cmd.manifest = [{
|
cmd.manifest = [
|
||||||
"model": "documents.document",
|
{"model": "documents.document", EXPORTER_FILE_NAME: "noexist.pdf"}
|
||||||
EXPORTER_FILE_NAME: "noexist.pdf"
|
]
|
||||||
}]
|
|
||||||
# self.assertRaises(CommandError, cmd._check_manifest)
|
# self.assertRaises(CommandError, cmd._check_manifest)
|
||||||
with self.assertRaises(CommandError) as cm:
|
with self.assertRaises(CommandError) as cm:
|
||||||
cmd._check_manifest()
|
cmd._check_manifest()
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
'The manifest file refers to "noexist.pdf"' in str(cm.exception))
|
'The manifest file refers to "noexist.pdf"' in str(cm.exception)
|
||||||
|
)
|
||||||
|
@ -6,10 +6,11 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestAutoComplete(DirectoriesMixin, TestCase):
|
class TestAutoComplete(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def test_auto_complete(self):
|
def test_auto_complete(self):
|
||||||
|
|
||||||
doc1 = Document.objects.create(title="doc1", checksum="A", content="test test2 test3")
|
doc1 = Document.objects.create(
|
||||||
|
title="doc1", checksum="A", content="test test2 test3"
|
||||||
|
)
|
||||||
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
doc2 = Document.objects.create(title="doc2", checksum="B", content="test test2")
|
||||||
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
doc3 = Document.objects.create(title="doc3", checksum="C", content="test2")
|
||||||
|
|
||||||
@ -19,7 +20,11 @@ class TestAutoComplete(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
ix = index.open_index()
|
ix = index.open_index()
|
||||||
|
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"])
|
self.assertListEqual(
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"])
|
index.autocomplete(ix, "tes"), [b"test3", b"test", b"test2"]
|
||||||
|
)
|
||||||
|
self.assertListEqual(
|
||||||
|
index.autocomplete(ix, "tes", limit=3), [b"test3", b"test", b"test2"]
|
||||||
|
)
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=1), [b"test3"])
|
||||||
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
self.assertListEqual(index.autocomplete(ix, "tes", limit=0), [])
|
||||||
|
@ -22,21 +22,29 @@ sample_file = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
|||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
class TestArchiver(DirectoriesMixin, TestCase):
|
class TestArchiver(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def make_models(self):
|
def make_models(self):
|
||||||
return Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf")
|
return Document.objects.create(
|
||||||
|
checksum="A",
|
||||||
|
title="A",
|
||||||
|
content="first document",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
|
|
||||||
def test_archiver(self):
|
def test_archiver(self):
|
||||||
|
|
||||||
doc = self.make_models()
|
doc = self.make_models()
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
shutil.copy(
|
||||||
|
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
call_command('document_archiver')
|
call_command("document_archiver")
|
||||||
|
|
||||||
def test_handle_document(self):
|
def test_handle_document(self):
|
||||||
|
|
||||||
doc = self.make_models()
|
doc = self.make_models()
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf"))
|
shutil.copy(
|
||||||
|
sample_file, os.path.join(self.dirs.originals_dir, f"{doc.id:07}.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
handle_document(doc.pk)
|
handle_document(doc.pk)
|
||||||
|
|
||||||
@ -66,10 +74,24 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}")
|
||||||
def test_naming_priorities(self):
|
def test_naming_priorities(self):
|
||||||
doc1 = Document.objects.create(checksum="A", title="document", content="first document", mime_type="application/pdf", filename="document.pdf")
|
doc1 = Document.objects.create(
|
||||||
doc2 = Document.objects.create(checksum="B", title="document", content="second document", mime_type="application/pdf", filename="document_01.pdf")
|
checksum="A",
|
||||||
|
title="document",
|
||||||
|
content="first document",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
filename="document.pdf",
|
||||||
|
)
|
||||||
|
doc2 = Document.objects.create(
|
||||||
|
checksum="B",
|
||||||
|
title="document",
|
||||||
|
content="second document",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
filename="document_01.pdf",
|
||||||
|
)
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
|
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document.pdf"))
|
||||||
shutil.copy(sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf"))
|
shutil.copy(
|
||||||
|
sample_file, os.path.join(self.dirs.originals_dir, f"document_01.pdf")
|
||||||
|
)
|
||||||
|
|
||||||
handle_document(doc2.pk)
|
handle_document(doc2.pk)
|
||||||
handle_document(doc1.pk)
|
handle_document(doc1.pk)
|
||||||
@ -82,12 +104,11 @@ class TestArchiver(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestDecryptDocuments(TestCase):
|
class TestDecryptDocuments(TestCase):
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(
|
||||||
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
ORIGINALS_DIR=os.path.join(os.path.dirname(__file__), "samples", "originals"),
|
||||||
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
THUMBNAIL_DIR=os.path.join(os.path.dirname(__file__), "samples", "thumb"),
|
||||||
PASSPHRASE="test",
|
PASSPHRASE="test",
|
||||||
PAPERLESS_FILENAME_FORMAT=None
|
PAPERLESS_FILENAME_FORMAT=None,
|
||||||
)
|
)
|
||||||
@mock.patch("documents.management.commands.decrypt_documents.input")
|
@mock.patch("documents.management.commands.decrypt_documents.input")
|
||||||
def test_decrypt(self, m):
|
def test_decrypt(self, m):
|
||||||
@ -99,17 +120,39 @@ class TestDecryptDocuments(TestCase):
|
|||||||
os.makedirs(thumb_dir, exist_ok=True)
|
os.makedirs(thumb_dir, exist_ok=True)
|
||||||
|
|
||||||
override_settings(
|
override_settings(
|
||||||
ORIGINALS_DIR=originals_dir,
|
ORIGINALS_DIR=originals_dir, THUMBNAIL_DIR=thumb_dir, PASSPHRASE="test"
|
||||||
THUMBNAIL_DIR=thumb_dir,
|
|
||||||
PASSPHRASE="test"
|
|
||||||
).enable()
|
).enable()
|
||||||
|
|
||||||
doc = Document.objects.create(checksum="82186aaa94f0b98697d704b90fd1c072", title="wow", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
doc = Document.objects.create(
|
||||||
|
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||||
|
title="wow",
|
||||||
|
filename="0000004.pdf.gpg",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
storage_type=Document.STORAGE_TYPE_GPG,
|
||||||
|
)
|
||||||
|
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), os.path.join(originals_dir, "0000004.pdf.gpg"))
|
shutil.copy(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", f"0000004.png.gpg"), os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"))
|
os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"originals",
|
||||||
|
"0000004.pdf.gpg",
|
||||||
|
),
|
||||||
|
os.path.join(originals_dir, "0000004.pdf.gpg"),
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"thumbnails",
|
||||||
|
f"0000004.png.gpg",
|
||||||
|
),
|
||||||
|
os.path.join(thumb_dir, f"{doc.id:07}.png.gpg"),
|
||||||
|
)
|
||||||
|
|
||||||
call_command('decrypt_documents')
|
call_command("decrypt_documents")
|
||||||
|
|
||||||
doc.refresh_from_db()
|
doc.refresh_from_db()
|
||||||
|
|
||||||
@ -126,7 +169,6 @@ class TestDecryptDocuments(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestMakeIndex(TestCase):
|
class TestMakeIndex(TestCase):
|
||||||
|
|
||||||
@mock.patch("documents.management.commands.document_index.index_reindex")
|
@mock.patch("documents.management.commands.document_index.index_reindex")
|
||||||
def test_reindex(self, m):
|
def test_reindex(self, m):
|
||||||
call_command("document_index", "reindex")
|
call_command("document_index", "reindex")
|
||||||
@ -139,7 +181,6 @@ class TestMakeIndex(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestRenamer(DirectoriesMixin, TestCase):
|
class TestRenamer(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
def test_rename(self):
|
def test_rename(self):
|
||||||
doc = Document.objects.create(title="test", mime_type="image/jpeg")
|
doc = Document.objects.create(title="test", mime_type="image/jpeg")
|
||||||
@ -164,8 +205,9 @@ class TestRenamer(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestCreateClassifier(TestCase):
|
class TestCreateClassifier(TestCase):
|
||||||
|
@mock.patch(
|
||||||
@mock.patch("documents.management.commands.document_create_classifier.train_classifier")
|
"documents.management.commands.document_create_classifier.train_classifier"
|
||||||
|
)
|
||||||
def test_create_classifier(self, m):
|
def test_create_classifier(self, m):
|
||||||
call_command("document_create_classifier")
|
call_command("document_create_classifier")
|
||||||
|
|
||||||
@ -173,7 +215,6 @@ class TestCreateClassifier(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestSanityChecker(DirectoriesMixin, TestCase):
|
class TestSanityChecker(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def test_no_issues(self):
|
def test_no_issues(self):
|
||||||
with self.assertLogs() as capture:
|
with self.assertLogs() as capture:
|
||||||
call_command("document_sanity_checker")
|
call_command("document_sanity_checker")
|
||||||
@ -182,7 +223,9 @@ class TestSanityChecker(DirectoriesMixin, TestCase):
|
|||||||
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
self.assertIn("Sanity checker detected no issues.", capture.output[0])
|
||||||
|
|
||||||
def test_errors(self):
|
def test_errors(self):
|
||||||
doc = Document.objects.create(title="test", content="test", filename="test.pdf", checksum="abc")
|
doc = Document.objects.create(
|
||||||
|
title="test", content="test", filename="test.pdf", checksum="abc"
|
||||||
|
)
|
||||||
Path(doc.source_path).touch()
|
Path(doc.source_path).touch()
|
||||||
Path(doc.thumbnail_path).touch()
|
Path(doc.thumbnail_path).touch()
|
||||||
|
|
||||||
|
@ -16,7 +16,6 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class ConsumerThread(Thread):
|
class ConsumerThread(Thread):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.cmd = document_consumer.Command()
|
self.cmd = document_consumer.Command()
|
||||||
@ -31,7 +30,7 @@ class ConsumerThread(Thread):
|
|||||||
|
|
||||||
def chunked(size, source):
|
def chunked(size, source):
|
||||||
for i in range(0, len(source), size):
|
for i in range(0, len(source), size):
|
||||||
yield source[i:i+size]
|
yield source[i : i + size]
|
||||||
|
|
||||||
|
|
||||||
class ConsumerMixin:
|
class ConsumerMixin:
|
||||||
@ -41,7 +40,9 @@ class ConsumerMixin:
|
|||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super(ConsumerMixin, self).setUp()
|
super(ConsumerMixin, self).setUp()
|
||||||
self.t = None
|
self.t = None
|
||||||
patcher = mock.patch("documents.management.commands.document_consumer.async_task")
|
patcher = mock.patch(
|
||||||
|
"documents.management.commands.document_consumer.async_task"
|
||||||
|
)
|
||||||
self.task_mock = patcher.start()
|
self.task_mock = patcher.start()
|
||||||
self.addCleanup(patcher.stop)
|
self.addCleanup(patcher.stop)
|
||||||
|
|
||||||
@ -81,13 +82,13 @@ class ConsumerMixin:
|
|||||||
print("Consumed a perfectly valid file.")
|
print("Consumed a perfectly valid file.")
|
||||||
|
|
||||||
def slow_write_file(self, target, incomplete=False):
|
def slow_write_file(self, target, incomplete=False):
|
||||||
with open(self.sample_file, 'rb') as f:
|
with open(self.sample_file, "rb") as f:
|
||||||
pdf_bytes = f.read()
|
pdf_bytes = f.read()
|
||||||
|
|
||||||
if incomplete:
|
if incomplete:
|
||||||
pdf_bytes = pdf_bytes[:len(pdf_bytes) - 100]
|
pdf_bytes = pdf_bytes[: len(pdf_bytes) - 100]
|
||||||
|
|
||||||
with open(target, 'wb') as f:
|
with open(target, "wb") as f:
|
||||||
# this will take 2 seconds, since the file is about 20k.
|
# this will take 2 seconds, since the file is about 20k.
|
||||||
print("Start writing file.")
|
print("Start writing file.")
|
||||||
for b in chunked(1000, pdf_bytes):
|
for b in chunked(1000, pdf_bytes):
|
||||||
@ -97,7 +98,6 @@ class ConsumerMixin:
|
|||||||
|
|
||||||
|
|
||||||
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||||
|
|
||||||
def test_consume_file(self):
|
def test_consume_file(self):
|
||||||
self.t_start()
|
self.t_start()
|
||||||
|
|
||||||
@ -195,23 +195,35 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
|||||||
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
@override_settings(CONSUMPTION_DIR="does_not_exist")
|
||||||
def test_consumption_directory_invalid(self):
|
def test_consumption_directory_invalid(self):
|
||||||
|
|
||||||
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||||
|
|
||||||
@override_settings(CONSUMPTION_DIR="")
|
@override_settings(CONSUMPTION_DIR="")
|
||||||
def test_consumption_directory_unset(self):
|
def test_consumption_directory_unset(self):
|
||||||
|
|
||||||
self.assertRaises(CommandError, call_command, 'document_consumer', '--oneshot')
|
self.assertRaises(CommandError, call_command, "document_consumer", "--oneshot")
|
||||||
|
|
||||||
def test_mac_write(self):
|
def test_mac_write(self):
|
||||||
self.task_mock.side_effect = self.bogus_task
|
self.task_mock.side_effect = self.bogus_task
|
||||||
|
|
||||||
self.t_start()
|
self.t_start()
|
||||||
|
|
||||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE"))
|
shutil.copy(
|
||||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf"))
|
self.sample_file, os.path.join(self.dirs.consumption_dir, ".DS_STORE")
|
||||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf"))
|
)
|
||||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "my_second_file.pdf"))
|
shutil.copy(
|
||||||
shutil.copy(self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_second_file.pdf"))
|
self.sample_file, os.path.join(self.dirs.consumption_dir, "my_file.pdf")
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
self.sample_file, os.path.join(self.dirs.consumption_dir, "._my_file.pdf")
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
self.sample_file,
|
||||||
|
os.path.join(self.dirs.consumption_dir, "my_second_file.pdf"),
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
self.sample_file,
|
||||||
|
os.path.join(self.dirs.consumption_dir, "._my_second_file.pdf"),
|
||||||
|
)
|
||||||
|
|
||||||
sleep(5)
|
sleep(5)
|
||||||
|
|
||||||
@ -219,15 +231,20 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
|||||||
|
|
||||||
self.assertEqual(2, self.task_mock.call_count)
|
self.assertEqual(2, self.task_mock.call_count)
|
||||||
|
|
||||||
fnames = [os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list]
|
fnames = [
|
||||||
|
os.path.basename(args[1]) for args, _ in self.task_mock.call_args_list
|
||||||
|
]
|
||||||
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
|
self.assertCountEqual(fnames, ["my_file.pdf", "my_second_file.pdf"])
|
||||||
|
|
||||||
def test_is_ignored(self):
|
def test_is_ignored(self):
|
||||||
test_paths = [
|
test_paths = [
|
||||||
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
|
(os.path.join(self.dirs.consumption_dir, "foo.pdf"), False),
|
||||||
(os.path.join(self.dirs.consumption_dir, "foo","bar.pdf"), False),
|
(os.path.join(self.dirs.consumption_dir, "foo", "bar.pdf"), False),
|
||||||
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
|
(os.path.join(self.dirs.consumption_dir, ".DS_STORE", "foo.pdf"), True),
|
||||||
(os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"), True),
|
(
|
||||||
|
os.path.join(self.dirs.consumption_dir, "foo", ".DS_STORE", "bar.pdf"),
|
||||||
|
True,
|
||||||
|
),
|
||||||
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
|
(os.path.join(self.dirs.consumption_dir, ".stfolder", "foo.pdf"), True),
|
||||||
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
|
(os.path.join(self.dirs.consumption_dir, "._foo.pdf"), True),
|
||||||
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
|
(os.path.join(self.dirs.consumption_dir, "._foo", "bar.pdf"), False),
|
||||||
@ -236,10 +253,13 @@ class TestConsumer(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
|||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
expected_ignored,
|
expected_ignored,
|
||||||
document_consumer._is_ignored(file_path),
|
document_consumer._is_ignored(file_path),
|
||||||
f'_is_ignored("{file_path}") != {expected_ignored}')
|
f'_is_ignored("{file_path}") != {expected_ignored}',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20)
|
@override_settings(
|
||||||
|
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20
|
||||||
|
)
|
||||||
class TestConsumerPolling(TestConsumer):
|
class TestConsumerPolling(TestConsumer):
|
||||||
# just do all the tests with polling
|
# just do all the tests with polling
|
||||||
pass
|
pass
|
||||||
@ -251,21 +271,27 @@ class TestConsumerRecursive(TestConsumer):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@override_settings(CONSUMER_RECURSIVE=True, CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=3, CONSUMER_POLLING_RETRY_COUNT=20)
|
@override_settings(
|
||||||
|
CONSUMER_RECURSIVE=True,
|
||||||
|
CONSUMER_POLLING=1,
|
||||||
|
CONSUMER_POLLING_DELAY=3,
|
||||||
|
CONSUMER_POLLING_RETRY_COUNT=20,
|
||||||
|
)
|
||||||
class TestConsumerRecursivePolling(TestConsumer):
|
class TestConsumerRecursivePolling(TestConsumer):
|
||||||
# just do all the tests with polling and recursive
|
# just do all the tests with polling and recursive
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
||||||
|
|
||||||
@override_settings(CONSUMER_RECURSIVE=True)
|
@override_settings(CONSUMER_RECURSIVE=True)
|
||||||
@override_settings(CONSUMER_SUBDIRS_AS_TAGS=True)
|
@override_settings(CONSUMER_SUBDIRS_AS_TAGS=True)
|
||||||
def test_consume_file_with_path_tags(self):
|
def test_consume_file_with_path_tags(self):
|
||||||
|
|
||||||
tag_names = ("existingTag", "Space Tag")
|
tag_names = ("existingTag", "Space Tag")
|
||||||
# Create a Tag prior to consuming a file using it in path
|
# Create a Tag prior to consuming a file using it in path
|
||||||
tag_ids = [Tag.objects.create(name="existingtag").pk,]
|
tag_ids = [
|
||||||
|
Tag.objects.create(name="existingtag").pk,
|
||||||
|
]
|
||||||
|
|
||||||
self.t_start()
|
self.t_start()
|
||||||
|
|
||||||
@ -292,6 +318,8 @@ class TestConsumerTags(DirectoriesMixin, ConsumerMixin, TransactionTestCase):
|
|||||||
# their order.
|
# their order.
|
||||||
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
|
self.assertCountEqual(kwargs["override_tag_ids"], tag_ids)
|
||||||
|
|
||||||
@override_settings(CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20)
|
@override_settings(
|
||||||
|
CONSUMER_POLLING=1, CONSUMER_POLLING_DELAY=1, CONSUMER_POLLING_RETRY_COUNT=20
|
||||||
|
)
|
||||||
def test_consume_file_with_path_tags_polling(self):
|
def test_consume_file_with_path_tags_polling(self):
|
||||||
self.test_consume_file_with_path_tags()
|
self.test_consume_file_with_path_tags()
|
||||||
|
@ -17,15 +17,41 @@ from documents.tests.utils import DirectoriesMixin, paperless_environment
|
|||||||
|
|
||||||
|
|
||||||
class TestExportImport(DirectoriesMixin, TestCase):
|
class TestExportImport(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.target = tempfile.mkdtemp()
|
self.target = tempfile.mkdtemp()
|
||||||
self.addCleanup(shutil.rmtree, self.target)
|
self.addCleanup(shutil.rmtree, self.target)
|
||||||
|
|
||||||
self.d1 = Document.objects.create(content="Content", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", title="wow1", filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
|
self.d1 = Document.objects.create(
|
||||||
self.d2 = Document.objects.create(content="Content", checksum="9c9691e51741c1f4f41a20896af31770", title="wow2", filename="0000002.pdf", mime_type="application/pdf")
|
content="Content",
|
||||||
self.d3 = Document.objects.create(content="Content", checksum="d38d7ed02e988e072caf924e0f3fcb76", title="wow2", filename="0000003.pdf", mime_type="application/pdf")
|
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||||
self.d4 = Document.objects.create(content="Content", checksum="82186aaa94f0b98697d704b90fd1c072", title="wow_dec", filename="0000004.pdf.gpg", mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_GPG)
|
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||||
|
title="wow1",
|
||||||
|
filename="0000001.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
self.d2 = Document.objects.create(
|
||||||
|
content="Content",
|
||||||
|
checksum="9c9691e51741c1f4f41a20896af31770",
|
||||||
|
title="wow2",
|
||||||
|
filename="0000002.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
|
self.d3 = Document.objects.create(
|
||||||
|
content="Content",
|
||||||
|
checksum="d38d7ed02e988e072caf924e0f3fcb76",
|
||||||
|
title="wow2",
|
||||||
|
filename="0000003.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
|
self.d4 = Document.objects.create(
|
||||||
|
content="Content",
|
||||||
|
checksum="82186aaa94f0b98697d704b90fd1c072",
|
||||||
|
title="wow_dec",
|
||||||
|
filename="0000004.pdf.gpg",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
storage_type=Document.STORAGE_TYPE_GPG,
|
||||||
|
)
|
||||||
|
|
||||||
self.t1 = Tag.objects.create(name="t")
|
self.t1 = Tag.objects.create(name="t")
|
||||||
self.dt1 = DocumentType.objects.create(name="dt")
|
self.dt1 = DocumentType.objects.create(name="dt")
|
||||||
@ -38,17 +64,21 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
super(TestExportImport, self).setUp()
|
super(TestExportImport, self).setUp()
|
||||||
|
|
||||||
def _get_document_from_manifest(self, manifest, id):
|
def _get_document_from_manifest(self, manifest, id):
|
||||||
f = list(filter(lambda d: d['model'] == "documents.document" and d['pk'] == id, manifest))
|
f = list(
|
||||||
|
filter(
|
||||||
|
lambda d: d["model"] == "documents.document" and d["pk"] == id, manifest
|
||||||
|
)
|
||||||
|
)
|
||||||
if len(f) == 1:
|
if len(f) == 1:
|
||||||
return f[0]
|
return f[0]
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"document with id {id} does not exist in manifest")
|
raise ValueError(f"document with id {id} does not exist in manifest")
|
||||||
|
|
||||||
@override_settings(
|
@override_settings(PASSPHRASE="test")
|
||||||
PASSPHRASE="test"
|
def _do_export(
|
||||||
)
|
self, use_filename_format=False, compare_checksums=False, delete=False
|
||||||
def _do_export(self, use_filename_format=False, compare_checksums=False, delete=False):
|
):
|
||||||
args = ['document_exporter', self.target]
|
args = ["document_exporter", self.target]
|
||||||
if use_filename_format:
|
if use_filename_format:
|
||||||
args += ["--use-filename-format"]
|
args += ["--use-filename-format"]
|
||||||
if compare_checksums:
|
if compare_checksums:
|
||||||
@ -65,39 +95,69 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
def test_exporter(self, use_filename_format=False):
|
def test_exporter(self, use_filename_format=False):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
manifest = self._do_export(use_filename_format=use_filename_format)
|
manifest = self._do_export(use_filename_format=use_filename_format)
|
||||||
|
|
||||||
self.assertEqual(len(manifest), 8)
|
self.assertEqual(len(manifest), 8)
|
||||||
self.assertEqual(len(list(filter(lambda e: e['model'] == 'documents.document', manifest))), 4)
|
self.assertEqual(
|
||||||
|
len(list(filter(lambda e: e["model"] == "documents.document", manifest))), 4
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||||
|
|
||||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d1.id)['fields']['title'], "wow1")
|
self.assertEqual(
|
||||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d2.id)['fields']['title'], "wow2")
|
self._get_document_from_manifest(manifest, self.d1.id)["fields"]["title"],
|
||||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d3.id)['fields']['title'], "wow2")
|
"wow1",
|
||||||
self.assertEqual(self._get_document_from_manifest(manifest, self.d4.id)['fields']['title'], "wow_dec")
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self._get_document_from_manifest(manifest, self.d2.id)["fields"]["title"],
|
||||||
|
"wow2",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self._get_document_from_manifest(manifest, self.d3.id)["fields"]["title"],
|
||||||
|
"wow2",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
self._get_document_from_manifest(manifest, self.d4.id)["fields"]["title"],
|
||||||
|
"wow_dec",
|
||||||
|
)
|
||||||
|
|
||||||
for element in manifest:
|
for element in manifest:
|
||||||
if element['model'] == 'documents.document':
|
if element["model"] == "documents.document":
|
||||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_FILE_NAME])
|
fname = os.path.join(
|
||||||
|
self.target, element[document_exporter.EXPORTER_FILE_NAME]
|
||||||
|
)
|
||||||
self.assertTrue(os.path.exists(fname))
|
self.assertTrue(os.path.exists(fname))
|
||||||
self.assertTrue(os.path.exists(os.path.join(self.target, element[document_exporter.EXPORTER_THUMBNAIL_NAME])))
|
self.assertTrue(
|
||||||
|
os.path.exists(
|
||||||
|
os.path.join(
|
||||||
|
self.target,
|
||||||
|
element[document_exporter.EXPORTER_THUMBNAIL_NAME],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
with open(fname, "rb") as f:
|
with open(fname, "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
self.assertEqual(checksum, element['fields']['checksum'])
|
self.assertEqual(checksum, element["fields"]["checksum"])
|
||||||
|
|
||||||
self.assertEqual(element['fields']['storage_type'], Document.STORAGE_TYPE_UNENCRYPTED)
|
self.assertEqual(
|
||||||
|
element["fields"]["storage_type"], Document.STORAGE_TYPE_UNENCRYPTED
|
||||||
|
)
|
||||||
|
|
||||||
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
if document_exporter.EXPORTER_ARCHIVE_NAME in element:
|
||||||
fname = os.path.join(self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME])
|
fname = os.path.join(
|
||||||
|
self.target, element[document_exporter.EXPORTER_ARCHIVE_NAME]
|
||||||
|
)
|
||||||
self.assertTrue(os.path.exists(fname))
|
self.assertTrue(os.path.exists(fname))
|
||||||
|
|
||||||
with open(fname, "rb") as f:
|
with open(fname, "rb") as f:
|
||||||
checksum = hashlib.md5(f.read()).hexdigest()
|
checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
self.assertEqual(checksum, element['fields']['archive_checksum'])
|
self.assertEqual(checksum, element["fields"]["archive_checksum"])
|
||||||
|
|
||||||
with paperless_environment() as dirs:
|
with paperless_environment() as dirs:
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
@ -107,7 +167,7 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
Tag.objects.all().delete()
|
Tag.objects.all().delete()
|
||||||
self.assertEqual(Document.objects.count(), 0)
|
self.assertEqual(Document.objects.count(), 0)
|
||||||
|
|
||||||
call_command('document_importer', self.target)
|
call_command("document_importer", self.target)
|
||||||
self.assertEqual(Document.objects.count(), 4)
|
self.assertEqual(Document.objects.count(), 4)
|
||||||
self.assertEqual(Tag.objects.count(), 1)
|
self.assertEqual(Tag.objects.count(), 1)
|
||||||
self.assertEqual(Correspondent.objects.count(), 1)
|
self.assertEqual(Correspondent.objects.count(), 1)
|
||||||
@ -122,21 +182,31 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
def test_exporter_with_filename_format(self):
|
def test_exporter_with_filename_format(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
with override_settings(PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"):
|
with override_settings(
|
||||||
|
PAPERLESS_FILENAME_FORMAT="{created_year}/{correspondent}/{title}"
|
||||||
|
):
|
||||||
self.test_exporter(use_filename_format=True)
|
self.test_exporter(use_filename_format=True)
|
||||||
|
|
||||||
def test_update_export_changed_time(self):
|
def test_update_export_changed_time(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
self._do_export()
|
self._do_export()
|
||||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||||
|
|
||||||
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
st_mtime_1 = os.stat(os.path.join(self.target, "manifest.json")).st_mtime
|
||||||
|
|
||||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
with mock.patch(
|
||||||
|
"documents.management.commands.document_exporter.shutil.copy2"
|
||||||
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
|
|
||||||
@ -145,7 +215,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
Path(self.d1.source_path).touch()
|
Path(self.d1.source_path).touch()
|
||||||
|
|
||||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
with mock.patch(
|
||||||
|
"documents.management.commands.document_exporter.shutil.copy2"
|
||||||
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
self.assertEqual(m.call_count, 1)
|
self.assertEqual(m.call_count, 1)
|
||||||
|
|
||||||
@ -157,13 +229,18 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
def test_update_export_changed_checksum(self):
|
def test_update_export_changed_checksum(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
self._do_export()
|
self._do_export()
|
||||||
|
|
||||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||||
|
|
||||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
with mock.patch(
|
||||||
|
"documents.management.commands.document_exporter.shutil.copy2"
|
||||||
|
) as m:
|
||||||
self._do_export()
|
self._do_export()
|
||||||
m.assert_not_called()
|
m.assert_not_called()
|
||||||
|
|
||||||
@ -172,7 +249,9 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
self.d2.checksum = "asdfasdgf3"
|
self.d2.checksum = "asdfasdgf3"
|
||||||
self.d2.save()
|
self.d2.save()
|
||||||
|
|
||||||
with mock.patch("documents.management.commands.document_exporter.shutil.copy2") as m:
|
with mock.patch(
|
||||||
|
"documents.management.commands.document_exporter.shutil.copy2"
|
||||||
|
) as m:
|
||||||
self._do_export(compare_checksums=True)
|
self._do_export(compare_checksums=True)
|
||||||
self.assertEqual(m.call_count, 1)
|
self.assertEqual(m.call_count, 1)
|
||||||
|
|
||||||
@ -180,28 +259,48 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
|
|
||||||
def test_update_export_deleted_document(self):
|
def test_update_export_deleted_document(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
manifest = self._do_export()
|
manifest = self._do_export()
|
||||||
|
|
||||||
self.assertTrue(len(manifest), 7)
|
self.assertTrue(len(manifest), 7)
|
||||||
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
doc_from_manifest = self._get_document_from_manifest(manifest, self.d3.id)
|
||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
self.assertTrue(
|
||||||
|
os.path.isfile(
|
||||||
|
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||||
|
)
|
||||||
|
)
|
||||||
self.d3.delete()
|
self.d3.delete()
|
||||||
|
|
||||||
manifest = self._do_export()
|
manifest = self._do_export()
|
||||||
self.assertRaises(ValueError, self._get_document_from_manifest, manifest, self.d3.id)
|
self.assertRaises(
|
||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
ValueError, self._get_document_from_manifest, manifest, self.d3.id
|
||||||
|
)
|
||||||
|
self.assertTrue(
|
||||||
|
os.path.isfile(
|
||||||
|
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
manifest = self._do_export(delete=True)
|
manifest = self._do_export(delete=True)
|
||||||
self.assertFalse(os.path.isfile(os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])))
|
self.assertFalse(
|
||||||
|
os.path.isfile(
|
||||||
|
os.path.join(self.target, doc_from_manifest[EXPORTER_FILE_NAME])
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
self.assertTrue(len(manifest), 6)
|
self.assertTrue(len(manifest), 6)
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{title}/{correspondent}")
|
||||||
def test_update_export_changed_location(self):
|
def test_update_export_changed_location(self):
|
||||||
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
shutil.rmtree(os.path.join(self.dirs.media_dir, "documents"))
|
||||||
shutil.copytree(os.path.join(os.path.dirname(__file__), "samples", "documents"), os.path.join(self.dirs.media_dir, "documents"))
|
shutil.copytree(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "documents"),
|
||||||
|
os.path.join(self.dirs.media_dir, "documents"),
|
||||||
|
)
|
||||||
|
|
||||||
m = self._do_export(use_filename_format=True)
|
m = self._do_export(use_filename_format=True)
|
||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
|
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow1", "c.pdf")))
|
||||||
@ -216,11 +315,18 @@ class TestExportImport(DirectoriesMixin, TestCase):
|
|||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
|
self.assertTrue(os.path.isfile(os.path.join(self.target, "new_title", "c.pdf")))
|
||||||
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
self.assertTrue(os.path.exists(os.path.join(self.target, "manifest.json")))
|
||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
|
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none.pdf")))
|
||||||
self.assertTrue(os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf")))
|
self.assertTrue(
|
||||||
|
os.path.isfile(os.path.join(self.target, "wow2", "none_01.pdf"))
|
||||||
|
)
|
||||||
|
|
||||||
def test_export_missing_files(self):
|
def test_export_missing_files(self):
|
||||||
|
|
||||||
target = tempfile.mkdtemp()
|
target = tempfile.mkdtemp()
|
||||||
self.addCleanup(shutil.rmtree, target)
|
self.addCleanup(shutil.rmtree, target)
|
||||||
Document.objects.create(checksum="AAAAAAAAAAAAAAAAA", title="wow", filename="0000004.pdf", mime_type="application/pdf")
|
Document.objects.create(
|
||||||
self.assertRaises(FileNotFoundError, call_command, 'document_exporter', target)
|
checksum="AAAAAAAAAAAAAAAAA",
|
||||||
|
title="wow",
|
||||||
|
filename="0000004.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
)
|
||||||
|
self.assertRaises(FileNotFoundError, call_command, "document_exporter", target)
|
||||||
|
@ -6,44 +6,64 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestRetagger(DirectoriesMixin, TestCase):
|
class TestRetagger(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def make_models(self):
|
def make_models(self):
|
||||||
self.d1 = Document.objects.create(checksum="A", title="A", content="first document")
|
self.d1 = Document.objects.create(
|
||||||
self.d2 = Document.objects.create(checksum="B", title="B", content="second document")
|
checksum="A", title="A", content="first document"
|
||||||
self.d3 = Document.objects.create(checksum="C", title="C", content="unrelated document")
|
)
|
||||||
self.d4 = Document.objects.create(checksum="D", title="D", content="auto document")
|
self.d2 = Document.objects.create(
|
||||||
|
checksum="B", title="B", content="second document"
|
||||||
|
)
|
||||||
|
self.d3 = Document.objects.create(
|
||||||
|
checksum="C", title="C", content="unrelated document"
|
||||||
|
)
|
||||||
|
self.d4 = Document.objects.create(
|
||||||
|
checksum="D", title="D", content="auto document"
|
||||||
|
)
|
||||||
|
|
||||||
self.tag_first = Tag.objects.create(name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY)
|
self.tag_first = Tag.objects.create(
|
||||||
self.tag_second = Tag.objects.create(name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY)
|
name="tag1", match="first", matching_algorithm=Tag.MATCH_ANY
|
||||||
|
)
|
||||||
|
self.tag_second = Tag.objects.create(
|
||||||
|
name="tag2", match="second", matching_algorithm=Tag.MATCH_ANY
|
||||||
|
)
|
||||||
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
self.tag_inbox = Tag.objects.create(name="test", is_inbox_tag=True)
|
||||||
self.tag_no_match = Tag.objects.create(name="test2")
|
self.tag_no_match = Tag.objects.create(name="test2")
|
||||||
self.tag_auto = Tag.objects.create(name="tagauto", matching_algorithm=Tag.MATCH_AUTO)
|
self.tag_auto = Tag.objects.create(
|
||||||
|
name="tagauto", matching_algorithm=Tag.MATCH_AUTO
|
||||||
|
)
|
||||||
|
|
||||||
self.d3.tags.add(self.tag_inbox)
|
self.d3.tags.add(self.tag_inbox)
|
||||||
self.d3.tags.add(self.tag_no_match)
|
self.d3.tags.add(self.tag_no_match)
|
||||||
self.d4.tags.add(self.tag_auto)
|
self.d4.tags.add(self.tag_auto)
|
||||||
|
|
||||||
|
|
||||||
self.correspondent_first = Correspondent.objects.create(
|
self.correspondent_first = Correspondent.objects.create(
|
||||||
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY)
|
name="c1", match="first", matching_algorithm=Correspondent.MATCH_ANY
|
||||||
|
)
|
||||||
self.correspondent_second = Correspondent.objects.create(
|
self.correspondent_second = Correspondent.objects.create(
|
||||||
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY)
|
name="c2", match="second", matching_algorithm=Correspondent.MATCH_ANY
|
||||||
|
)
|
||||||
|
|
||||||
self.doctype_first = DocumentType.objects.create(
|
self.doctype_first = DocumentType.objects.create(
|
||||||
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY)
|
name="dt1", match="first", matching_algorithm=DocumentType.MATCH_ANY
|
||||||
|
)
|
||||||
self.doctype_second = DocumentType.objects.create(
|
self.doctype_second = DocumentType.objects.create(
|
||||||
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY)
|
name="dt2", match="second", matching_algorithm=DocumentType.MATCH_ANY
|
||||||
|
)
|
||||||
|
|
||||||
def get_updated_docs(self):
|
def get_updated_docs(self):
|
||||||
return Document.objects.get(title="A"), Document.objects.get(title="B"), \
|
return (
|
||||||
Document.objects.get(title="C"), Document.objects.get(title="D")
|
Document.objects.get(title="A"),
|
||||||
|
Document.objects.get(title="B"),
|
||||||
|
Document.objects.get(title="C"),
|
||||||
|
Document.objects.get(title="D"),
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super(TestRetagger, self).setUp()
|
super(TestRetagger, self).setUp()
|
||||||
self.make_models()
|
self.make_models()
|
||||||
|
|
||||||
def test_add_tags(self):
|
def test_add_tags(self):
|
||||||
call_command('document_retagger', '--tags')
|
call_command("document_retagger", "--tags")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 1)
|
self.assertEqual(d_first.tags.count(), 1)
|
||||||
@ -55,14 +75,14 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(d_second.tags.first(), self.tag_second)
|
self.assertEqual(d_second.tags.first(), self.tag_second)
|
||||||
|
|
||||||
def test_add_type(self):
|
def test_add_type(self):
|
||||||
call_command('document_retagger', '--document_type')
|
call_command("document_retagger", "--document_type")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.document_type, self.doctype_first)
|
self.assertEqual(d_first.document_type, self.doctype_first)
|
||||||
self.assertEqual(d_second.document_type, self.doctype_second)
|
self.assertEqual(d_second.document_type, self.doctype_second)
|
||||||
|
|
||||||
def test_add_correspondent(self):
|
def test_add_correspondent(self):
|
||||||
call_command('document_retagger', '--correspondent')
|
call_command("document_retagger", "--correspondent")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
self.assertEqual(d_first.correspondent, self.correspondent_first)
|
||||||
@ -71,19 +91,26 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
|||||||
def test_overwrite_preserve_inbox(self):
|
def test_overwrite_preserve_inbox(self):
|
||||||
self.d1.tags.add(self.tag_second)
|
self.d1.tags.add(self.tag_second)
|
||||||
|
|
||||||
call_command('document_retagger', '--tags', '--overwrite')
|
call_command("document_retagger", "--tags", "--overwrite")
|
||||||
|
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
self.assertIsNotNone(Tag.objects.get(id=self.tag_second.id))
|
||||||
|
|
||||||
self.assertCountEqual([tag.id for tag in d_first.tags.all()], [self.tag_first.id])
|
self.assertCountEqual(
|
||||||
self.assertCountEqual([tag.id for tag in d_second.tags.all()], [self.tag_second.id])
|
[tag.id for tag in d_first.tags.all()], [self.tag_first.id]
|
||||||
self.assertCountEqual([tag.id for tag in d_unrelated.tags.all()], [self.tag_inbox.id, self.tag_no_match.id])
|
)
|
||||||
|
self.assertCountEqual(
|
||||||
|
[tag.id for tag in d_second.tags.all()], [self.tag_second.id]
|
||||||
|
)
|
||||||
|
self.assertCountEqual(
|
||||||
|
[tag.id for tag in d_unrelated.tags.all()],
|
||||||
|
[self.tag_inbox.id, self.tag_no_match.id],
|
||||||
|
)
|
||||||
self.assertEqual(d_auto.tags.count(), 0)
|
self.assertEqual(d_auto.tags.count(), 0)
|
||||||
|
|
||||||
def test_add_tags_suggest(self):
|
def test_add_tags_suggest(self):
|
||||||
call_command('document_retagger', '--tags', '--suggest')
|
call_command("document_retagger", "--tags", "--suggest")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 0)
|
self.assertEqual(d_first.tags.count(), 0)
|
||||||
@ -91,21 +118,23 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(d_auto.tags.count(), 1)
|
self.assertEqual(d_auto.tags.count(), 1)
|
||||||
|
|
||||||
def test_add_type_suggest(self):
|
def test_add_type_suggest(self):
|
||||||
call_command('document_retagger', '--document_type', '--suggest')
|
call_command("document_retagger", "--document_type", "--suggest")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.document_type, None)
|
self.assertEqual(d_first.document_type, None)
|
||||||
self.assertEqual(d_second.document_type, None)
|
self.assertEqual(d_second.document_type, None)
|
||||||
|
|
||||||
def test_add_correspondent_suggest(self):
|
def test_add_correspondent_suggest(self):
|
||||||
call_command('document_retagger', '--correspondent', '--suggest')
|
call_command("document_retagger", "--correspondent", "--suggest")
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.correspondent, None)
|
self.assertEqual(d_first.correspondent, None)
|
||||||
self.assertEqual(d_second.correspondent, None)
|
self.assertEqual(d_second.correspondent, None)
|
||||||
|
|
||||||
def test_add_tags_suggest_url(self):
|
def test_add_tags_suggest_url(self):
|
||||||
call_command('document_retagger', '--tags', '--suggest', '--base-url=http://localhost')
|
call_command(
|
||||||
|
"document_retagger", "--tags", "--suggest", "--base-url=http://localhost"
|
||||||
|
)
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.tags.count(), 0)
|
self.assertEqual(d_first.tags.count(), 0)
|
||||||
@ -113,14 +142,24 @@ class TestRetagger(DirectoriesMixin, TestCase):
|
|||||||
self.assertEqual(d_auto.tags.count(), 1)
|
self.assertEqual(d_auto.tags.count(), 1)
|
||||||
|
|
||||||
def test_add_type_suggest_url(self):
|
def test_add_type_suggest_url(self):
|
||||||
call_command('document_retagger', '--document_type', '--suggest', '--base-url=http://localhost')
|
call_command(
|
||||||
|
"document_retagger",
|
||||||
|
"--document_type",
|
||||||
|
"--suggest",
|
||||||
|
"--base-url=http://localhost",
|
||||||
|
)
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.document_type, None)
|
self.assertEqual(d_first.document_type, None)
|
||||||
self.assertEqual(d_second.document_type, None)
|
self.assertEqual(d_second.document_type, None)
|
||||||
|
|
||||||
def test_add_correspondent_suggest_url(self):
|
def test_add_correspondent_suggest_url(self):
|
||||||
call_command('document_retagger', '--correspondent', '--suggest', '--base-url=http://localhost')
|
call_command(
|
||||||
|
"document_retagger",
|
||||||
|
"--correspondent",
|
||||||
|
"--suggest",
|
||||||
|
"--base-url=http://localhost",
|
||||||
|
)
|
||||||
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
d_first, d_second, d_unrelated, d_auto = self.get_updated_docs()
|
||||||
|
|
||||||
self.assertEqual(d_first.correspondent, None)
|
self.assertEqual(d_first.correspondent, None)
|
||||||
|
@ -12,7 +12,6 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestManageSuperUser(DirectoriesMixin, TestCase):
|
class TestManageSuperUser(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def reset_environment(self):
|
def reset_environment(self):
|
||||||
if "PAPERLESS_ADMIN_USER" in os.environ:
|
if "PAPERLESS_ADMIN_USER" in os.environ:
|
||||||
del os.environ["PAPERLESS_ADMIN_USER"]
|
del os.environ["PAPERLESS_ADMIN_USER"]
|
||||||
|
@ -11,13 +11,30 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def make_models(self):
|
def make_models(self):
|
||||||
self.d1 = Document.objects.create(checksum="A", title="A", content="first document", mime_type="application/pdf", filename="test.pdf")
|
self.d1 = Document.objects.create(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d1.source_path)
|
checksum="A",
|
||||||
|
title="A",
|
||||||
|
content="first document",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
filename="test.pdf",
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||||
|
self.d1.source_path,
|
||||||
|
)
|
||||||
|
|
||||||
self.d2 = Document.objects.create(checksum="Ass", title="A", content="first document", mime_type="application/pdf", filename="test2.pdf")
|
self.d2 = Document.objects.create(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), self.d2.source_path)
|
checksum="Ass",
|
||||||
|
title="A",
|
||||||
|
content="first document",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
filename="test2.pdf",
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||||
|
self.d2.source_path,
|
||||||
|
)
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
super(TestMakeThumbnails, self).setUp()
|
super(TestMakeThumbnails, self).setUp()
|
||||||
@ -40,13 +57,13 @@ class TestMakeThumbnails(DirectoriesMixin, TestCase):
|
|||||||
def test_command(self):
|
def test_command(self):
|
||||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||||
call_command('document_thumbnails')
|
call_command("document_thumbnails")
|
||||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||||
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
|
self.assertTrue(os.path.isfile(self.d2.thumbnail_path))
|
||||||
|
|
||||||
def test_command_documentid(self):
|
def test_command_documentid(self):
|
||||||
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
self.assertFalse(os.path.isfile(self.d1.thumbnail_path))
|
||||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||||
call_command('document_thumbnails', '-d', f"{self.d1.id}")
|
call_command("document_thumbnails", "-d", f"{self.d1.id}")
|
||||||
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
self.assertTrue(os.path.isfile(self.d1.thumbnail_path))
|
||||||
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
self.assertFalse(os.path.isfile(self.d2.thumbnail_path))
|
||||||
|
@ -12,25 +12,24 @@ from ..signals import document_consumption_finished
|
|||||||
|
|
||||||
|
|
||||||
class TestMatching(TestCase):
|
class TestMatching(TestCase):
|
||||||
|
|
||||||
def _test_matching(self, text, algorithm, true, false):
|
def _test_matching(self, text, algorithm, true, false):
|
||||||
for klass in (Tag, Correspondent, DocumentType):
|
for klass in (Tag, Correspondent, DocumentType):
|
||||||
instance = klass.objects.create(
|
instance = klass.objects.create(
|
||||||
name=str(randint(10000, 99999)),
|
name=str(randint(10000, 99999)),
|
||||||
match=text,
|
match=text,
|
||||||
matching_algorithm=getattr(klass, algorithm)
|
matching_algorithm=getattr(klass, algorithm),
|
||||||
)
|
)
|
||||||
for string in true:
|
for string in true:
|
||||||
doc = Document(content=string)
|
doc = Document(content=string)
|
||||||
self.assertTrue(
|
self.assertTrue(
|
||||||
matching.matches(instance, doc),
|
matching.matches(instance, doc),
|
||||||
'"%s" should match "%s" but it does not' % (text, string)
|
'"%s" should match "%s" but it does not' % (text, string),
|
||||||
)
|
)
|
||||||
for string in false:
|
for string in false:
|
||||||
doc = Document(content=string)
|
doc = Document(content=string)
|
||||||
self.assertFalse(
|
self.assertFalse(
|
||||||
matching.matches(instance, doc),
|
matching.matches(instance, doc),
|
||||||
'"%s" should not match "%s" but it does' % (text, string)
|
'"%s" should not match "%s" but it does' % (text, string),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_match_all(self):
|
def test_match_all(self):
|
||||||
@ -47,15 +46,13 @@ class TestMatching(TestCase):
|
|||||||
"I have alphas, charlie, and gamma in me",
|
"I have alphas, charlie, and gamma in me",
|
||||||
"I have alphas in me",
|
"I have alphas in me",
|
||||||
"I have bravo in me",
|
"I have bravo in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
"12 34 56",
|
"12 34 56",
|
||||||
"MATCH_ALL",
|
"MATCH_ALL",
|
||||||
(
|
("I have 12 34, and 56 in me",),
|
||||||
"I have 12 34, and 56 in me",
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"I have 12 in me",
|
"I have 12 in me",
|
||||||
"I have 34 in me",
|
"I have 34 in me",
|
||||||
@ -64,7 +61,7 @@ class TestMatching(TestCase):
|
|||||||
"I have 120, 34, and 56 in me",
|
"I have 120, 34, and 56 in me",
|
||||||
"I have 123456 in me",
|
"I have 123456 in me",
|
||||||
"I have 01234567 in me",
|
"I have 01234567 in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
@ -79,7 +76,7 @@ class TestMatching(TestCase):
|
|||||||
"the quick brown wolf jumped over the lazy dogs",
|
"the quick brown wolf jumped over the lazy dogs",
|
||||||
"the quick brown fox jumped over the fat dogs",
|
"the quick brown fox jumped over the fat dogs",
|
||||||
"the quick brown fox jumped over the lazy... dogs",
|
"the quick brown fox jumped over the lazy... dogs",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_match_any(self):
|
def test_match_any(self):
|
||||||
@ -97,7 +94,7 @@ class TestMatching(TestCase):
|
|||||||
(
|
(
|
||||||
"I have alphas in me",
|
"I have alphas in me",
|
||||||
"I have bravo in me",
|
"I have bravo in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
@ -114,7 +111,7 @@ class TestMatching(TestCase):
|
|||||||
(
|
(
|
||||||
"I have 123456 in me",
|
"I have 123456 in me",
|
||||||
"I have 01234567 in me",
|
"I have 01234567 in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
@ -124,9 +121,7 @@ class TestMatching(TestCase):
|
|||||||
"the quick brown fox",
|
"the quick brown fox",
|
||||||
"jumped over the lazy dogs.",
|
"jumped over the lazy dogs.",
|
||||||
),
|
),
|
||||||
(
|
("the lazy fox jumped over the brown dogs",),
|
||||||
"the lazy fox jumped over the brown dogs",
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_match_literal(self):
|
def test_match_literal(self):
|
||||||
@ -134,9 +129,7 @@ class TestMatching(TestCase):
|
|||||||
self._test_matching(
|
self._test_matching(
|
||||||
"alpha charlie gamma",
|
"alpha charlie gamma",
|
||||||
"MATCH_LITERAL",
|
"MATCH_LITERAL",
|
||||||
(
|
("I have 'alpha charlie gamma' in me",),
|
||||||
"I have 'alpha charlie gamma' in me",
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"I have alpha in me",
|
"I have alpha in me",
|
||||||
"I have charlie in me",
|
"I have charlie in me",
|
||||||
@ -146,15 +139,13 @@ class TestMatching(TestCase):
|
|||||||
"I have alphas, charlie, and gamma in me",
|
"I have alphas, charlie, and gamma in me",
|
||||||
"I have alphas in me",
|
"I have alphas in me",
|
||||||
"I have bravo in me",
|
"I have bravo in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self._test_matching(
|
self._test_matching(
|
||||||
"12 34 56",
|
"12 34 56",
|
||||||
"MATCH_LITERAL",
|
"MATCH_LITERAL",
|
||||||
(
|
("I have 12 34 56 in me",),
|
||||||
"I have 12 34 56 in me",
|
|
||||||
),
|
|
||||||
(
|
(
|
||||||
"I have 12 in me",
|
"I have 12 in me",
|
||||||
"I have 34 in me",
|
"I have 34 in me",
|
||||||
@ -165,7 +156,7 @@ class TestMatching(TestCase):
|
|||||||
"I have 120, 340, and 560 in me",
|
"I have 120, 340, and 560 in me",
|
||||||
"I have 123456 in me",
|
"I have 123456 in me",
|
||||||
"I have 01234567 in me",
|
"I have 01234567 in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_match_regex(self):
|
def test_match_regex(self):
|
||||||
@ -186,18 +177,11 @@ class TestMatching(TestCase):
|
|||||||
"I have alpha, charlie, and gamma in me",
|
"I have alpha, charlie, and gamma in me",
|
||||||
"I have alphas, charlie, and gamma in me",
|
"I have alphas, charlie, and gamma in me",
|
||||||
"I have alphas in me",
|
"I have alphas in me",
|
||||||
)
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_tach_invalid_regex(self):
|
def test_tach_invalid_regex(self):
|
||||||
self._test_matching(
|
self._test_matching("[[", "MATCH_REGEX", [], ["Don't match this"])
|
||||||
"[[",
|
|
||||||
"MATCH_REGEX",
|
|
||||||
[],
|
|
||||||
[
|
|
||||||
"Don't match this"
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_match_fuzzy(self):
|
def test_match_fuzzy(self):
|
||||||
|
|
||||||
@ -210,9 +194,7 @@ class TestMatching(TestCase):
|
|||||||
"1220 Main Street, Springfeld, Miss.",
|
"1220 Main Street, Springfeld, Miss.",
|
||||||
"1220 Main Street Springfield Miss",
|
"1220 Main Street Springfield Miss",
|
||||||
),
|
),
|
||||||
(
|
("1220 Main Street, Springfield, Mich.",),
|
||||||
"1220 Main Street, Springfield, Mich.",
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -225,9 +207,10 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
|||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
TestCase.setUp(self)
|
TestCase.setUp(self)
|
||||||
User.objects.create_user(username='test_consumer', password='12345')
|
User.objects.create_user(username="test_consumer", password="12345")
|
||||||
self.doc_contains = Document.objects.create(
|
self.doc_contains = Document.objects.create(
|
||||||
content="I contain the keyword.", mime_type="application/pdf")
|
content="I contain the keyword.", mime_type="application/pdf"
|
||||||
|
)
|
||||||
|
|
||||||
self.index_dir = tempfile.mkdtemp()
|
self.index_dir = tempfile.mkdtemp()
|
||||||
# TODO: we should not need the index here.
|
# TODO: we should not need the index here.
|
||||||
@ -238,40 +221,43 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
|
|||||||
|
|
||||||
def test_tag_applied_any(self):
|
def test_tag_applied_any(self):
|
||||||
t1 = Tag.objects.create(
|
t1 = Tag.objects.create(
|
||||||
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY)
|
name="test", match="keyword", matching_algorithm=Tag.MATCH_ANY
|
||||||
|
)
|
||||||
document_consumption_finished.send(
|
document_consumption_finished.send(
|
||||||
sender=self.__class__, document=self.doc_contains)
|
sender=self.__class__, document=self.doc_contains
|
||||||
|
)
|
||||||
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
self.assertTrue(list(self.doc_contains.tags.all()) == [t1])
|
||||||
|
|
||||||
def test_tag_not_applied(self):
|
def test_tag_not_applied(self):
|
||||||
Tag.objects.create(
|
Tag.objects.create(
|
||||||
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY)
|
name="test", match="no-match", matching_algorithm=Tag.MATCH_ANY
|
||||||
|
)
|
||||||
document_consumption_finished.send(
|
document_consumption_finished.send(
|
||||||
sender=self.__class__, document=self.doc_contains)
|
sender=self.__class__, document=self.doc_contains
|
||||||
|
)
|
||||||
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
self.assertTrue(list(self.doc_contains.tags.all()) == [])
|
||||||
|
|
||||||
def test_correspondent_applied(self):
|
def test_correspondent_applied(self):
|
||||||
correspondent = Correspondent.objects.create(
|
correspondent = Correspondent.objects.create(
|
||||||
name="test",
|
name="test", match="keyword", matching_algorithm=Correspondent.MATCH_ANY
|
||||||
match="keyword",
|
|
||||||
matching_algorithm=Correspondent.MATCH_ANY
|
|
||||||
)
|
)
|
||||||
document_consumption_finished.send(
|
document_consumption_finished.send(
|
||||||
sender=self.__class__, document=self.doc_contains)
|
sender=self.__class__, document=self.doc_contains
|
||||||
|
)
|
||||||
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
self.assertTrue(self.doc_contains.correspondent == correspondent)
|
||||||
|
|
||||||
def test_correspondent_not_applied(self):
|
def test_correspondent_not_applied(self):
|
||||||
Tag.objects.create(
|
Tag.objects.create(
|
||||||
name="test",
|
name="test", match="no-match", matching_algorithm=Correspondent.MATCH_ANY
|
||||||
match="no-match",
|
|
||||||
matching_algorithm=Correspondent.MATCH_ANY
|
|
||||||
)
|
)
|
||||||
document_consumption_finished.send(
|
document_consumption_finished.send(
|
||||||
sender=self.__class__, document=self.doc_contains)
|
sender=self.__class__, document=self.doc_contains
|
||||||
|
)
|
||||||
self.assertEqual(self.doc_contains.correspondent, None)
|
self.assertEqual(self.doc_contains.correspondent, None)
|
||||||
|
|
||||||
def test_logentry_created(self):
|
def test_logentry_created(self):
|
||||||
document_consumption_finished.send(
|
document_consumption_finished.send(
|
||||||
sender=self.__class__, document=self.doc_contains)
|
sender=self.__class__, document=self.doc_contains
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(LogEntry.objects.count(), 1)
|
self.assertEqual(LogEntry.objects.count(), 1)
|
||||||
|
@ -24,18 +24,12 @@ def archive_path_old(self):
|
|||||||
else:
|
else:
|
||||||
fname = "{:07}.pdf".format(self.pk)
|
fname = "{:07}.pdf".format(self.pk)
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ARCHIVE_DIR, fname)
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def archive_path_new(doc):
|
def archive_path_new(doc):
|
||||||
if doc.archive_filename is not None:
|
if doc.archive_filename is not None:
|
||||||
return os.path.join(
|
return os.path.join(settings.ARCHIVE_DIR, str(doc.archive_filename))
|
||||||
settings.ARCHIVE_DIR,
|
|
||||||
str(doc.archive_filename)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -48,10 +42,7 @@ def source_path(doc):
|
|||||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg" # pragma: no cover
|
fname += ".gpg" # pragma: no cover
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def thumbnail_path(doc):
|
def thumbnail_path(doc):
|
||||||
@ -59,13 +50,18 @@ def thumbnail_path(doc):
|
|||||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
file_name += ".gpg"
|
file_name += ".gpg"
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.THUMBNAIL_DIR, file_name)
|
||||||
settings.THUMBNAIL_DIR,
|
|
||||||
file_name
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def make_test_document(document_class, title: str, mime_type: str, original: str, original_filename: str, archive: str = None, archive_filename: str = None):
|
def make_test_document(
|
||||||
|
document_class,
|
||||||
|
title: str,
|
||||||
|
mime_type: str,
|
||||||
|
original: str,
|
||||||
|
original_filename: str,
|
||||||
|
archive: str = None,
|
||||||
|
archive_filename: str = None,
|
||||||
|
):
|
||||||
doc = document_class()
|
doc = document_class()
|
||||||
doc.filename = original_filename
|
doc.filename = original_filename
|
||||||
doc.title = title
|
doc.title = title
|
||||||
@ -96,8 +92,12 @@ def make_test_document(document_class, title: str, mime_type: str, original: str
|
|||||||
|
|
||||||
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
|
simple_jpg = os.path.join(os.path.dirname(__file__), "samples", "simple.jpg")
|
||||||
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
simple_pdf = os.path.join(os.path.dirname(__file__), "samples", "simple.pdf")
|
||||||
simple_pdf2 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf")
|
simple_pdf2 = os.path.join(
|
||||||
simple_pdf3 = os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf")
|
os.path.dirname(__file__), "samples", "documents", "originals", "0000002.pdf"
|
||||||
|
)
|
||||||
|
simple_pdf3 = os.path.join(
|
||||||
|
os.path.dirname(__file__), "samples", "documents", "originals", "0000003.pdf"
|
||||||
|
)
|
||||||
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
|
simple_txt = os.path.join(os.path.dirname(__file__), "samples", "simple.txt")
|
||||||
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
|
simple_png = os.path.join(os.path.dirname(__file__), "samples", "simple-noalpha.png")
|
||||||
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
||||||
@ -106,26 +106,52 @@ simple_png2 = os.path.join(os.path.dirname(__file__), "examples", "no-text.png")
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1011_auto_20210101_2340'
|
migrate_from = "1011_auto_20210101_2340"
|
||||||
migrate_to = '1012_fix_archive_files'
|
migrate_to = "1012_fix_archive_files"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
|
|
||||||
self.unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf3, "unrelated.pdf", simple_pdf)
|
self.unrelated = make_test_document(
|
||||||
self.no_text = make_test_document(Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf)
|
Document,
|
||||||
self.doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
"unrelated",
|
||||||
self.clash1 = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
"application/pdf",
|
||||||
self.clash2 = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf)
|
simple_pdf3,
|
||||||
self.clash3 = make_test_document(Document, "clash", "image/png", simple_png, "clash.png", simple_pdf)
|
"unrelated.pdf",
|
||||||
self.clash4 = make_test_document(Document, "clash.png", "application/pdf", simple_pdf2, "clash.png.pdf", simple_pdf2)
|
simple_pdf,
|
||||||
|
)
|
||||||
|
self.no_text = make_test_document(
|
||||||
|
Document, "no-text", "image/png", simple_png2, "no-text.png", simple_pdf
|
||||||
|
)
|
||||||
|
self.doc_no_archive = make_test_document(
|
||||||
|
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||||
|
)
|
||||||
|
self.clash1 = make_test_document(
|
||||||
|
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||||
|
)
|
||||||
|
self.clash2 = make_test_document(
|
||||||
|
Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf
|
||||||
|
)
|
||||||
|
self.clash3 = make_test_document(
|
||||||
|
Document, "clash", "image/png", simple_png, "clash.png", simple_pdf
|
||||||
|
)
|
||||||
|
self.clash4 = make_test_document(
|
||||||
|
Document,
|
||||||
|
"clash.png",
|
||||||
|
"application/pdf",
|
||||||
|
simple_pdf2,
|
||||||
|
"clash.png.pdf",
|
||||||
|
simple_pdf2,
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
|
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash2))
|
||||||
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
|
self.assertEqual(archive_path_old(self.clash1), archive_path_old(self.clash3))
|
||||||
self.assertNotEqual(archive_path_old(self.clash1), archive_path_old(self.clash4))
|
self.assertNotEqual(
|
||||||
|
archive_path_old(self.clash1), archive_path_old(self.clash4)
|
||||||
|
)
|
||||||
|
|
||||||
def testArchiveFilesMigrated(self):
|
def testArchiveFilesMigrated(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
for doc in Document.objects.all():
|
for doc in Document.objects.all():
|
||||||
if doc.archive_checksum:
|
if doc.archive_checksum:
|
||||||
@ -144,31 +170,65 @@ class TestMigrateArchiveFiles(DirectoriesMixin, TestMigrations):
|
|||||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||||
|
|
||||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 6)
|
self.assertEqual(
|
||||||
|
Document.objects.filter(archive_checksum__isnull=False).count(), 6
|
||||||
|
)
|
||||||
|
|
||||||
def test_filenames(self):
|
def test_filenames(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
self.assertEqual(
|
||||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
)
|
||||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, f"{self.clash1.id:07}.pdf")
|
self.assertEqual(
|
||||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, f"{self.clash2.id:07}.pdf")
|
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, f"{self.clash3.id:07}.pdf")
|
)
|
||||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash1.id).archive_filename,
|
||||||
|
f"{self.clash1.id:07}.pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash2.id).archive_filename,
|
||||||
|
f"{self.clash2.id:07}.pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash3.id).archive_filename,
|
||||||
|
f"{self.clash3.id:07}.pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
class TestMigrateArchiveFilesWithFilenameFormat(TestMigrateArchiveFiles):
|
||||||
|
|
||||||
def test_filenames(self):
|
def test_filenames(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
self.assertEqual(Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf")
|
self.assertEqual(
|
||||||
self.assertEqual(Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf")
|
Document.objects.get(id=self.unrelated.id).archive_filename, "unrelated.pdf"
|
||||||
self.assertEqual(Document.objects.get(id=self.doc_no_archive.id).archive_filename, None)
|
)
|
||||||
self.assertEqual(Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf")
|
self.assertEqual(
|
||||||
self.assertEqual(Document.objects.get(id=self.clash2.id).archive_filename, "none/clash_01.pdf")
|
Document.objects.get(id=self.no_text.id).archive_filename, "no-text.pdf"
|
||||||
self.assertEqual(Document.objects.get(id=self.clash3.id).archive_filename, "none/clash_02.pdf")
|
)
|
||||||
self.assertEqual(Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf")
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.doc_no_archive.id).archive_filename, None
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash1.id).archive_filename, "none/clash.pdf"
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash2.id).archive_filename,
|
||||||
|
"none/clash_01.pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash3.id).archive_filename,
|
||||||
|
"none/clash_02.pdf",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
Document.objects.get(id=self.clash4.id).archive_filename, "clash.png.pdf"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def fake_parse_wrapper(parser, path, mime_type, file_name):
|
def fake_parse_wrapper(parser, path, mime_type, file_name):
|
||||||
@ -179,34 +239,63 @@ def fake_parse_wrapper(parser, path, mime_type, file_name):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1011_auto_20210101_2340'
|
migrate_from = "1011_auto_20210101_2340"
|
||||||
migrate_to = '1012_fix_archive_files'
|
migrate_to = "1012_fix_archive_files"
|
||||||
auto_migrate = False
|
auto_migrate = False
|
||||||
|
|
||||||
def test_archive_missing(self):
|
def test_archive_missing(self):
|
||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf)
|
doc = make_test_document(
|
||||||
|
Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf
|
||||||
|
)
|
||||||
os.unlink(archive_path_old(doc))
|
os.unlink(archive_path_old(doc))
|
||||||
|
|
||||||
self.assertRaisesMessage(ValueError, "does not exist at: ", self.performMigration)
|
self.assertRaisesMessage(
|
||||||
|
ValueError, "does not exist at: ", self.performMigration
|
||||||
|
)
|
||||||
|
|
||||||
def test_parser_missing(self):
|
def test_parser_missing(self):
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc1 = make_test_document(Document, "document", "invalid/typesss768", simple_png, "document.png", simple_pdf)
|
doc1 = make_test_document(
|
||||||
doc2 = make_test_document(Document, "document", "invalid/typesss768", simple_jpg, "document.jpg", simple_pdf)
|
Document,
|
||||||
|
"document",
|
||||||
|
"invalid/typesss768",
|
||||||
|
simple_png,
|
||||||
|
"document.png",
|
||||||
|
simple_pdf,
|
||||||
|
)
|
||||||
|
doc2 = make_test_document(
|
||||||
|
Document,
|
||||||
|
"document",
|
||||||
|
"invalid/typesss768",
|
||||||
|
simple_jpg,
|
||||||
|
"document.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
)
|
||||||
|
|
||||||
self.assertRaisesMessage(ValueError, "no parsers are available", self.performMigration)
|
self.assertRaisesMessage(
|
||||||
|
ValueError, "no parsers are available", self.performMigration
|
||||||
|
)
|
||||||
|
|
||||||
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
@mock.patch("documents.migrations.1012_fix_archive_files.parse_wrapper")
|
||||||
def test_parser_error(self, m):
|
def test_parser_error(self, m):
|
||||||
m.side_effect = ParseError()
|
m.side_effect = ParseError()
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
doc1 = make_test_document(
|
||||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||||
|
)
|
||||||
|
doc2 = make_test_document(
|
||||||
|
Document,
|
||||||
|
"document",
|
||||||
|
"application/pdf",
|
||||||
|
simple_jpg,
|
||||||
|
"document.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
)
|
||||||
|
|
||||||
self.assertIsNotNone(doc1.archive_checksum)
|
self.assertIsNotNone(doc1.archive_checksum)
|
||||||
self.assertIsNotNone(doc2.archive_checksum)
|
self.assertIsNotNone(doc2.archive_checksum)
|
||||||
@ -217,12 +306,29 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
|||||||
self.assertEqual(m.call_count, 6)
|
self.assertEqual(m.call_count, 6)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(list(filter(lambda log: "Parse error, will try again in 5 seconds" in log, capture.output))),
|
len(
|
||||||
4)
|
list(
|
||||||
|
filter(
|
||||||
|
lambda log: "Parse error, will try again in 5 seconds" in log,
|
||||||
|
capture.output,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
4,
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(list(filter(lambda log: "Unable to regenerate archive document for ID:" in log, capture.output))),
|
len(
|
||||||
2)
|
list(
|
||||||
|
filter(
|
||||||
|
lambda log: "Unable to regenerate archive document for ID:"
|
||||||
|
in log,
|
||||||
|
capture.output,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
@ -240,15 +346,33 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
|||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc1 = make_test_document(Document, "document", "image/png", simple_png, "document.png", simple_pdf)
|
doc1 = make_test_document(
|
||||||
doc2 = make_test_document(Document, "document", "application/pdf", simple_jpg, "document.jpg", simple_pdf)
|
Document, "document", "image/png", simple_png, "document.png", simple_pdf
|
||||||
|
)
|
||||||
|
doc2 = make_test_document(
|
||||||
|
Document,
|
||||||
|
"document",
|
||||||
|
"application/pdf",
|
||||||
|
simple_jpg,
|
||||||
|
"document.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
)
|
||||||
|
|
||||||
with self.assertLogs() as capture:
|
with self.assertLogs() as capture:
|
||||||
self.performMigration()
|
self.performMigration()
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
len(list(filter(lambda log: "Parser did not return an archive document for document" in log, capture.output))),
|
len(
|
||||||
2)
|
list(
|
||||||
|
filter(
|
||||||
|
lambda log: "Parser did not return an archive document for document"
|
||||||
|
in log,
|
||||||
|
capture.output,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
),
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
@ -264,19 +388,37 @@ class TestMigrateArchiveFilesErrors(DirectoriesMixin, TestMigrations):
|
|||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1012_fix_archive_files'
|
migrate_from = "1012_fix_archive_files"
|
||||||
migrate_to = '1011_auto_20210101_2340'
|
migrate_to = "1011_auto_20210101_2340"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
|
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc_unrelated = make_test_document(Document, "unrelated", "application/pdf", simple_pdf2, "unrelated.txt", simple_pdf2, "unrelated.pdf")
|
doc_unrelated = make_test_document(
|
||||||
doc_no_archive = make_test_document(Document, "no_archive", "text/plain", simple_txt, "no_archive.txt")
|
Document,
|
||||||
clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_02.pdf")
|
"unrelated",
|
||||||
|
"application/pdf",
|
||||||
|
simple_pdf2,
|
||||||
|
"unrelated.txt",
|
||||||
|
simple_pdf2,
|
||||||
|
"unrelated.pdf",
|
||||||
|
)
|
||||||
|
doc_no_archive = make_test_document(
|
||||||
|
Document, "no_archive", "text/plain", simple_txt, "no_archive.txt"
|
||||||
|
)
|
||||||
|
clashB = make_test_document(
|
||||||
|
Document,
|
||||||
|
"clash",
|
||||||
|
"image/jpeg",
|
||||||
|
simple_jpg,
|
||||||
|
"clash.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
"clash_02.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
def testArchiveFilesReverted(self):
|
def testArchiveFilesReverted(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
for doc in Document.objects.all():
|
for doc in Document.objects.all():
|
||||||
if doc.archive_checksum:
|
if doc.archive_checksum:
|
||||||
@ -291,35 +433,77 @@ class TestMigrateArchiveFilesBackwards(DirectoriesMixin, TestMigrations):
|
|||||||
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
archive_checksum = hashlib.md5(f.read()).hexdigest()
|
||||||
self.assertEqual(archive_checksum, doc.archive_checksum)
|
self.assertEqual(archive_checksum, doc.archive_checksum)
|
||||||
|
|
||||||
self.assertEqual(Document.objects.filter(archive_checksum__isnull=False).count(), 2)
|
self.assertEqual(
|
||||||
|
Document.objects.filter(archive_checksum__isnull=False).count(), 2
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
|
||||||
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(TestMigrateArchiveFilesBackwards):
|
class TestMigrateArchiveFilesBackwardsWithFilenameFormat(
|
||||||
|
TestMigrateArchiveFilesBackwards
|
||||||
|
):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
@override_settings(PAPERLESS_FILENAME_FORMAT="")
|
||||||
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
|
class TestMigrateArchiveFilesBackwardsErrors(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1012_fix_archive_files'
|
migrate_from = "1012_fix_archive_files"
|
||||||
migrate_to = '1011_auto_20210101_2340'
|
migrate_to = "1011_auto_20210101_2340"
|
||||||
auto_migrate = False
|
auto_migrate = False
|
||||||
|
|
||||||
def test_filename_clash(self):
|
def test_filename_clash(self):
|
||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash_02.pdf")
|
self.clashA = make_test_document(
|
||||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
Document,
|
||||||
|
"clash",
|
||||||
|
"application/pdf",
|
||||||
|
simple_pdf,
|
||||||
|
"clash.pdf",
|
||||||
|
simple_pdf,
|
||||||
|
"clash_02.pdf",
|
||||||
|
)
|
||||||
|
self.clashB = make_test_document(
|
||||||
|
Document,
|
||||||
|
"clash",
|
||||||
|
"image/jpeg",
|
||||||
|
simple_jpg,
|
||||||
|
"clash.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
"clash_01.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertRaisesMessage(ValueError, "would clash with another archive filename", self.performMigration)
|
self.assertRaisesMessage(
|
||||||
|
ValueError,
|
||||||
|
"would clash with another archive filename",
|
||||||
|
self.performMigration,
|
||||||
|
)
|
||||||
|
|
||||||
def test_filename_exists(self):
|
def test_filename_exists(self):
|
||||||
|
|
||||||
Document = self.apps.get_model("documents", "Document")
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
self.clashA = make_test_document(Document, "clash", "application/pdf", simple_pdf, "clash.pdf", simple_pdf, "clash.pdf")
|
self.clashA = make_test_document(
|
||||||
self.clashB = make_test_document(Document, "clash", "image/jpeg", simple_jpg, "clash.jpg", simple_pdf, "clash_01.pdf")
|
Document,
|
||||||
|
"clash",
|
||||||
|
"application/pdf",
|
||||||
|
simple_pdf,
|
||||||
|
"clash.pdf",
|
||||||
|
simple_pdf,
|
||||||
|
"clash.pdf",
|
||||||
|
)
|
||||||
|
self.clashB = make_test_document(
|
||||||
|
Document,
|
||||||
|
"clash",
|
||||||
|
"image/jpeg",
|
||||||
|
simple_jpg,
|
||||||
|
"clash.jpg",
|
||||||
|
simple_pdf,
|
||||||
|
"clash_01.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertRaisesMessage(ValueError, "file already exists.", self.performMigration)
|
self.assertRaisesMessage(
|
||||||
|
ValueError, "file already exists.", self.performMigration
|
||||||
|
)
|
||||||
|
@ -19,10 +19,7 @@ def source_path_before(self):
|
|||||||
if self.storage_type == STORAGE_TYPE_GPG:
|
if self.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg"
|
fname += ".gpg"
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def file_type_after(self):
|
def file_type_after(self):
|
||||||
@ -37,30 +34,43 @@ def source_path_after(doc):
|
|||||||
if doc.storage_type == STORAGE_TYPE_GPG:
|
if doc.storage_type == STORAGE_TYPE_GPG:
|
||||||
fname += ".gpg" # pragma: no cover
|
fname += ".gpg" # pragma: no cover
|
||||||
|
|
||||||
return os.path.join(
|
return os.path.join(settings.ORIGINALS_DIR, fname)
|
||||||
settings.ORIGINALS_DIR,
|
|
||||||
fname
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
@override_settings(PASSPHRASE="test")
|
@override_settings(PASSPHRASE="test")
|
||||||
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1002_auto_20201111_1105'
|
migrate_from = "1002_auto_20201111_1105"
|
||||||
migrate_to = '1003_mime_types'
|
migrate_to = "1003_mime_types"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
doc = Document.objects.create(title="test", file_type="pdf", filename="file1.pdf")
|
doc = Document.objects.create(
|
||||||
|
title="test", file_type="pdf", filename="file1.pdf"
|
||||||
|
)
|
||||||
self.doc_id = doc.id
|
self.doc_id = doc.id
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_before(doc))
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||||
|
source_path_before(doc),
|
||||||
|
)
|
||||||
|
|
||||||
doc2 = Document.objects.create(checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG)
|
doc2 = Document.objects.create(
|
||||||
|
checksum="B", file_type="pdf", storage_type=STORAGE_TYPE_GPG
|
||||||
|
)
|
||||||
self.doc2_id = doc2.id
|
self.doc2_id = doc2.id
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000004.pdf.gpg"), source_path_before(doc2))
|
shutil.copy(
|
||||||
|
os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"originals",
|
||||||
|
"0000004.pdf.gpg",
|
||||||
|
),
|
||||||
|
source_path_before(doc2),
|
||||||
|
)
|
||||||
|
|
||||||
def testMimeTypesMigrated(self):
|
def testMimeTypesMigrated(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc = Document.objects.get(id=self.doc_id)
|
doc = Document.objects.get(id=self.doc_id)
|
||||||
self.assertEqual(doc.mime_type, "application/pdf")
|
self.assertEqual(doc.mime_type, "application/pdf")
|
||||||
@ -72,17 +82,22 @@ class TestMigrateMimeType(DirectoriesMixin, TestMigrations):
|
|||||||
@override_settings(PASSPHRASE="test")
|
@override_settings(PASSPHRASE="test")
|
||||||
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
class TestMigrateMimeTypeBackwards(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1003_mime_types'
|
migrate_from = "1003_mime_types"
|
||||||
migrate_to = '1002_auto_20201111_1105'
|
migrate_to = "1002_auto_20201111_1105"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
doc = Document.objects.create(title="test", mime_type="application/pdf", filename="file1.pdf")
|
doc = Document.objects.create(
|
||||||
|
title="test", mime_type="application/pdf", filename="file1.pdf"
|
||||||
|
)
|
||||||
self.doc_id = doc.id
|
self.doc_id = doc.id
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"), source_path_after(doc))
|
shutil.copy(
|
||||||
|
os.path.join(os.path.dirname(__file__), "samples", "simple.pdf"),
|
||||||
|
source_path_after(doc),
|
||||||
|
)
|
||||||
|
|
||||||
def testMimeTypesReverted(self):
|
def testMimeTypesReverted(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
|
|
||||||
doc = Document.objects.get(id=self.doc_id)
|
doc = Document.objects.get(id=self.doc_id)
|
||||||
self.assertEqual(doc.file_type, "pdf")
|
self.assertEqual(doc.file_type, "pdf")
|
||||||
|
@ -3,13 +3,13 @@ from documents.tests.utils import DirectoriesMixin, TestMigrations
|
|||||||
|
|
||||||
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
|
class TestMigrateNullCharacters(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1014_auto_20210228_1614'
|
migrate_from = "1014_auto_20210228_1614"
|
||||||
migrate_to = '1015_remove_null_characters'
|
migrate_to = "1015_remove_null_characters"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Document = apps.get_model("documents", "Document")
|
Document = apps.get_model("documents", "Document")
|
||||||
self.doc = Document.objects.create(content="aaa\0bbb")
|
self.doc = Document.objects.create(content="aaa\0bbb")
|
||||||
|
|
||||||
def testMimeTypesMigrated(self):
|
def testMimeTypesMigrated(self):
|
||||||
Document = self.apps.get_model('documents', 'Document')
|
Document = self.apps.get_model("documents", "Document")
|
||||||
self.assertNotIn("\0", Document.objects.get(id=self.doc.id).content)
|
self.assertNotIn("\0", Document.objects.get(id=self.doc.id).content)
|
||||||
|
@ -3,8 +3,8 @@ from documents.tests.utils import DirectoriesMixin, TestMigrations
|
|||||||
|
|
||||||
class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
|
class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1012_fix_archive_files'
|
migrate_from = "1012_fix_archive_files"
|
||||||
migrate_to = '1013_migrate_tag_colour'
|
migrate_to = "1013_migrate_tag_colour"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Tag = apps.get_model("documents", "Tag")
|
Tag = apps.get_model("documents", "Tag")
|
||||||
@ -13,7 +13,7 @@ class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
|
|||||||
self.t3_id = Tag.objects.create(name="tag3", colour=5).id
|
self.t3_id = Tag.objects.create(name="tag3", colour=5).id
|
||||||
|
|
||||||
def testMimeTypesMigrated(self):
|
def testMimeTypesMigrated(self):
|
||||||
Tag = self.apps.get_model('documents', 'Tag')
|
Tag = self.apps.get_model("documents", "Tag")
|
||||||
self.assertEqual(Tag.objects.get(id=self.t1_id).color, "#a6cee3")
|
self.assertEqual(Tag.objects.get(id=self.t1_id).color, "#a6cee3")
|
||||||
self.assertEqual(Tag.objects.get(id=self.t2_id).color, "#a6cee3")
|
self.assertEqual(Tag.objects.get(id=self.t2_id).color, "#a6cee3")
|
||||||
self.assertEqual(Tag.objects.get(id=self.t3_id).color, "#fb9a99")
|
self.assertEqual(Tag.objects.get(id=self.t3_id).color, "#fb9a99")
|
||||||
@ -21,8 +21,8 @@ class TestMigrateTagColor(DirectoriesMixin, TestMigrations):
|
|||||||
|
|
||||||
class TestMigrateTagColorBackwards(DirectoriesMixin, TestMigrations):
|
class TestMigrateTagColorBackwards(DirectoriesMixin, TestMigrations):
|
||||||
|
|
||||||
migrate_from = '1013_migrate_tag_colour'
|
migrate_from = "1013_migrate_tag_colour"
|
||||||
migrate_to = '1012_fix_archive_files'
|
migrate_to = "1012_fix_archive_files"
|
||||||
|
|
||||||
def setUpBeforeMigration(self, apps):
|
def setUpBeforeMigration(self, apps):
|
||||||
Tag = apps.get_model("documents", "Tag")
|
Tag = apps.get_model("documents", "Tag")
|
||||||
@ -31,7 +31,7 @@ class TestMigrateTagColorBackwards(DirectoriesMixin, TestMigrations):
|
|||||||
self.t3_id = Tag.objects.create(name="tag3", color="#123456").id
|
self.t3_id = Tag.objects.create(name="tag3", color="#123456").id
|
||||||
|
|
||||||
def testMimeTypesReverted(self):
|
def testMimeTypesReverted(self):
|
||||||
Tag = self.apps.get_model('documents', 'Tag')
|
Tag = self.apps.get_model("documents", "Tag")
|
||||||
self.assertEqual(Tag.objects.get(id=self.t1_id).colour, 1)
|
self.assertEqual(Tag.objects.get(id=self.t1_id).colour, 1)
|
||||||
self.assertEqual(Tag.objects.get(id=self.t2_id).colour, 9)
|
self.assertEqual(Tag.objects.get(id=self.t2_id).colour, 9)
|
||||||
self.assertEqual(Tag.objects.get(id=self.t3_id).colour, 1)
|
self.assertEqual(Tag.objects.get(id=self.t3_id).colour, 1)
|
||||||
|
@ -5,7 +5,6 @@ from ..models import Document, Correspondent
|
|||||||
|
|
||||||
|
|
||||||
class CorrespondentTestCase(TestCase):
|
class CorrespondentTestCase(TestCase):
|
||||||
|
|
||||||
def test___str__(self):
|
def test___str__(self):
|
||||||
for s in ("test", "οχι", "test with fun_charÅc'\"terß"):
|
for s in ("test", "οχι", "test with fun_charÅc'\"terß"):
|
||||||
correspondent = CorrespondentFactory.create(name=s)
|
correspondent = CorrespondentFactory.create(name=s)
|
||||||
@ -13,7 +12,6 @@ class CorrespondentTestCase(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class DocumentTestCase(TestCase):
|
class DocumentTestCase(TestCase):
|
||||||
|
|
||||||
def test_correspondent_deletion_does_not_cascade(self):
|
def test_correspondent_deletion_does_not_cascade(self):
|
||||||
|
|
||||||
self.assertEqual(Correspondent.objects.all().count(), 0)
|
self.assertEqual(Correspondent.objects.all().count(), 0)
|
||||||
|
@ -6,8 +6,14 @@ from unittest import mock
|
|||||||
|
|
||||||
from django.test import TestCase, override_settings
|
from django.test import TestCase, override_settings
|
||||||
|
|
||||||
from documents.parsers import get_parser_class, get_supported_file_extensions, get_default_file_extension, \
|
from documents.parsers import (
|
||||||
get_parser_class_for_mime_type, DocumentParser, is_file_ext_supported
|
get_parser_class,
|
||||||
|
get_supported_file_extensions,
|
||||||
|
get_default_file_extension,
|
||||||
|
get_parser_class_for_mime_type,
|
||||||
|
DocumentParser,
|
||||||
|
is_file_ext_supported,
|
||||||
|
)
|
||||||
from paperless_tesseract.parsers import RasterisedDocumentParser
|
from paperless_tesseract.parsers import RasterisedDocumentParser
|
||||||
from paperless_text.parsers import TextDocumentParser
|
from paperless_text.parsers import TextDocumentParser
|
||||||
|
|
||||||
@ -25,24 +31,26 @@ def fake_magic_from_file(file, mime=False):
|
|||||||
|
|
||||||
@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
|
@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
|
||||||
class TestParserDiscovery(TestCase):
|
class TestParserDiscovery(TestCase):
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
def test__get_parser_class_1_parser(self, m, *args):
|
def test__get_parser_class_1_parser(self, m, *args):
|
||||||
class DummyParser(object):
|
class DummyParser(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
m.return_value = (
|
m.return_value = (
|
||||||
(None, {"weight": 0, "parser": DummyParser, "mime_types": {"application/pdf": ".pdf"}}),
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
|
"weight": 0,
|
||||||
|
"parser": DummyParser,
|
||||||
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
|
},
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(get_parser_class("doc.pdf"), DummyParser)
|
||||||
get_parser_class("doc.pdf"),
|
|
||||||
DummyParser
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
def test__get_parser_class_n_parsers(self, m, *args):
|
def test__get_parser_class_n_parsers(self, m, *args):
|
||||||
|
|
||||||
class DummyParser1(object):
|
class DummyParser1(object):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -50,22 +58,31 @@ class TestParserDiscovery(TestCase):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
m.return_value = (
|
m.return_value = (
|
||||||
(None, {"weight": 0, "parser": DummyParser1, "mime_types": {"application/pdf": ".pdf"}}),
|
(
|
||||||
(None, {"weight": 1, "parser": DummyParser2, "mime_types": {"application/pdf": ".pdf"}}),
|
None,
|
||||||
|
{
|
||||||
|
"weight": 0,
|
||||||
|
"parser": DummyParser1,
|
||||||
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
None,
|
||||||
|
{
|
||||||
|
"weight": 1,
|
||||||
|
"parser": DummyParser2,
|
||||||
|
"mime_types": {"application/pdf": ".pdf"},
|
||||||
|
},
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(
|
self.assertEqual(get_parser_class("doc.pdf"), DummyParser2)
|
||||||
get_parser_class("doc.pdf"),
|
|
||||||
DummyParser2
|
|
||||||
)
|
|
||||||
|
|
||||||
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
@mock.patch("documents.parsers.document_consumer_declaration.send")
|
||||||
def test__get_parser_class_0_parsers(self, m, *args):
|
def test__get_parser_class_0_parsers(self, m, *args):
|
||||||
m.return_value = []
|
m.return_value = []
|
||||||
with TemporaryDirectory() as tmpdir:
|
with TemporaryDirectory() as tmpdir:
|
||||||
self.assertIsNone(
|
self.assertIsNone(get_parser_class("doc.pdf"))
|
||||||
get_parser_class("doc.pdf")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def fake_get_thumbnail(self, path, mimetype, file_name):
|
def fake_get_thumbnail(self, path, mimetype, file_name):
|
||||||
@ -73,13 +90,10 @@ def fake_get_thumbnail(self, path, mimetype, file_name):
|
|||||||
|
|
||||||
|
|
||||||
class TestBaseParser(TestCase):
|
class TestBaseParser(TestCase):
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
|
|
||||||
self.scratch = tempfile.mkdtemp()
|
self.scratch = tempfile.mkdtemp()
|
||||||
override_settings(
|
override_settings(SCRATCH_DIR=self.scratch).enable()
|
||||||
SCRATCH_DIR=self.scratch
|
|
||||||
).enable()
|
|
||||||
|
|
||||||
def tearDown(self) -> None:
|
def tearDown(self) -> None:
|
||||||
shutil.rmtree(self.scratch)
|
shutil.rmtree(self.scratch)
|
||||||
@ -101,23 +115,28 @@ class TestBaseParser(TestCase):
|
|||||||
|
|
||||||
|
|
||||||
class TestParserAvailability(TestCase):
|
class TestParserAvailability(TestCase):
|
||||||
|
|
||||||
def test_file_extensions(self):
|
def test_file_extensions(self):
|
||||||
|
|
||||||
for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
|
for ext in [".pdf", ".jpe", ".jpg", ".jpeg", ".txt", ".csv"]:
|
||||||
self.assertIn(ext, get_supported_file_extensions())
|
self.assertIn(ext, get_supported_file_extensions())
|
||||||
self.assertEqual(get_default_file_extension('application/pdf'), ".pdf")
|
self.assertEqual(get_default_file_extension("application/pdf"), ".pdf")
|
||||||
self.assertEqual(get_default_file_extension('image/png'), ".png")
|
self.assertEqual(get_default_file_extension("image/png"), ".png")
|
||||||
self.assertEqual(get_default_file_extension('image/jpeg'), ".jpg")
|
self.assertEqual(get_default_file_extension("image/jpeg"), ".jpg")
|
||||||
self.assertEqual(get_default_file_extension('text/plain'), ".txt")
|
self.assertEqual(get_default_file_extension("text/plain"), ".txt")
|
||||||
self.assertEqual(get_default_file_extension('text/csv'), ".csv")
|
self.assertEqual(get_default_file_extension("text/csv"), ".csv")
|
||||||
self.assertEqual(get_default_file_extension('application/zip'), ".zip")
|
self.assertEqual(get_default_file_extension("application/zip"), ".zip")
|
||||||
self.assertEqual(get_default_file_extension('aasdasd/dgfgf'), "")
|
self.assertEqual(get_default_file_extension("aasdasd/dgfgf"), "")
|
||||||
|
|
||||||
self.assertIsInstance(get_parser_class_for_mime_type('application/pdf')(logging_group=None), RasterisedDocumentParser)
|
self.assertIsInstance(
|
||||||
self.assertIsInstance(get_parser_class_for_mime_type('text/plain')(logging_group=None), TextDocumentParser)
|
get_parser_class_for_mime_type("application/pdf")(logging_group=None),
|
||||||
self.assertEqual(get_parser_class_for_mime_type('text/sdgsdf'), None)
|
RasterisedDocumentParser,
|
||||||
|
)
|
||||||
|
self.assertIsInstance(
|
||||||
|
get_parser_class_for_mime_type("text/plain")(logging_group=None),
|
||||||
|
TextDocumentParser,
|
||||||
|
)
|
||||||
|
self.assertEqual(get_parser_class_for_mime_type("text/sdgsdf"), None)
|
||||||
|
|
||||||
self.assertTrue(is_file_ext_supported('.pdf'))
|
self.assertTrue(is_file_ext_supported(".pdf"))
|
||||||
self.assertFalse(is_file_ext_supported('.hsdfh'))
|
self.assertFalse(is_file_ext_supported(".hsdfh"))
|
||||||
self.assertFalse(is_file_ext_supported(''))
|
self.assertFalse(is_file_ext_supported(""))
|
||||||
|
@ -13,7 +13,6 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestSanityCheckMessages(TestCase):
|
class TestSanityCheckMessages(TestCase):
|
||||||
|
|
||||||
def test_no_messages(self):
|
def test_no_messages(self):
|
||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
self.assertEqual(len(messages), 0)
|
self.assertEqual(len(messages), 0)
|
||||||
@ -23,7 +22,9 @@ class TestSanityCheckMessages(TestCase):
|
|||||||
messages.log_messages()
|
messages.log_messages()
|
||||||
self.assertEqual(len(capture.output), 1)
|
self.assertEqual(len(capture.output), 1)
|
||||||
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
self.assertEqual(capture.records[0].levelno, logging.INFO)
|
||||||
self.assertEqual(capture.records[0].message, "Sanity checker detected no issues.")
|
self.assertEqual(
|
||||||
|
capture.records[0].message, "Sanity checker detected no issues."
|
||||||
|
)
|
||||||
|
|
||||||
def test_info(self):
|
def test_info(self):
|
||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
@ -61,22 +62,58 @@ class TestSanityCheckMessages(TestCase):
|
|||||||
self.assertEqual(capture.records[0].levelno, logging.ERROR)
|
self.assertEqual(capture.records[0].levelno, logging.ERROR)
|
||||||
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
|
self.assertEqual(capture.records[0].message, "Something is seriously wrong")
|
||||||
|
|
||||||
class TestSanityCheck(DirectoriesMixin, TestCase):
|
|
||||||
|
|
||||||
|
class TestSanityCheck(DirectoriesMixin, TestCase):
|
||||||
def make_test_data(self):
|
def make_test_data(self):
|
||||||
|
|
||||||
with filelock.FileLock(settings.MEDIA_LOCK):
|
with filelock.FileLock(settings.MEDIA_LOCK):
|
||||||
# just make sure that the lockfile is present.
|
# just make sure that the lockfile is present.
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "originals", "0000001.pdf"), os.path.join(self.dirs.originals_dir, "0000001.pdf"))
|
shutil.copy(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "archive", "0000001.pdf"), os.path.join(self.dirs.archive_dir, "0000001.pdf"))
|
os.path.join(
|
||||||
shutil.copy(os.path.join(os.path.dirname(__file__), "samples", "documents", "thumbnails", "0000001.png"), os.path.join(self.dirs.thumbnail_dir, "0000001.png"))
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"originals",
|
||||||
|
"0000001.pdf",
|
||||||
|
),
|
||||||
|
os.path.join(self.dirs.originals_dir, "0000001.pdf"),
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"archive",
|
||||||
|
"0000001.pdf",
|
||||||
|
),
|
||||||
|
os.path.join(self.dirs.archive_dir, "0000001.pdf"),
|
||||||
|
)
|
||||||
|
shutil.copy(
|
||||||
|
os.path.join(
|
||||||
|
os.path.dirname(__file__),
|
||||||
|
"samples",
|
||||||
|
"documents",
|
||||||
|
"thumbnails",
|
||||||
|
"0000001.png",
|
||||||
|
),
|
||||||
|
os.path.join(self.dirs.thumbnail_dir, "0000001.png"),
|
||||||
|
)
|
||||||
|
|
||||||
return Document.objects.create(title="test", checksum="42995833e01aea9b3edee44bbfdd7ce1", archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b", content="test", pk=1, filename="0000001.pdf", mime_type="application/pdf", archive_filename="0000001.pdf")
|
return Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
checksum="42995833e01aea9b3edee44bbfdd7ce1",
|
||||||
|
archive_checksum="62acb0bcbfbcaa62ca6ad3668e4e404b",
|
||||||
|
content="test",
|
||||||
|
pk=1,
|
||||||
|
filename="0000001.pdf",
|
||||||
|
mime_type="application/pdf",
|
||||||
|
archive_filename="0000001.pdf",
|
||||||
|
)
|
||||||
|
|
||||||
def assertSanityError(self, messageRegex):
|
def assertSanityError(self, messageRegex):
|
||||||
messages = check_sanity()
|
messages = check_sanity()
|
||||||
self.assertTrue(messages.has_error())
|
self.assertTrue(messages.has_error())
|
||||||
self.assertRegex(messages[0]['message'], messageRegex)
|
self.assertRegex(messages[0]["message"], messageRegex)
|
||||||
|
|
||||||
def test_no_docs(self):
|
def test_no_docs(self):
|
||||||
self.assertEqual(len(check_sanity()), 0)
|
self.assertEqual(len(check_sanity()), 0)
|
||||||
@ -138,7 +175,7 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
|
|||||||
self.assertFalse(messages.has_error())
|
self.assertFalse(messages.has_error())
|
||||||
self.assertFalse(messages.has_warning())
|
self.assertFalse(messages.has_warning())
|
||||||
self.assertEqual(len(messages), 1)
|
self.assertEqual(len(messages), 1)
|
||||||
self.assertRegex(messages[0]['message'], "Document .* has no content.")
|
self.assertRegex(messages[0]["message"], "Document .* has no content.")
|
||||||
|
|
||||||
def test_orphaned_file(self):
|
def test_orphaned_file(self):
|
||||||
doc = self.make_test_data()
|
doc = self.make_test_data()
|
||||||
@ -147,7 +184,7 @@ class TestSanityCheck(DirectoriesMixin, TestCase):
|
|||||||
self.assertFalse(messages.has_error())
|
self.assertFalse(messages.has_error())
|
||||||
self.assertTrue(messages.has_warning())
|
self.assertTrue(messages.has_warning())
|
||||||
self.assertEqual(len(messages), 1)
|
self.assertEqual(len(messages), 1)
|
||||||
self.assertRegex(messages[0]['message'], "Orphaned file in media dir")
|
self.assertRegex(messages[0]["message"], "Orphaned file in media dir")
|
||||||
|
|
||||||
def test_archive_filename_no_checksum(self):
|
def test_archive_filename_no_checksum(self):
|
||||||
doc = self.make_test_data()
|
doc = self.make_test_data()
|
||||||
|
@ -7,7 +7,6 @@ from paperless.settings import default_task_workers, default_threads_per_worker
|
|||||||
|
|
||||||
|
|
||||||
class TestSettings(TestCase):
|
class TestSettings(TestCase):
|
||||||
|
|
||||||
@mock.patch("paperless.settings.multiprocessing.cpu_count")
|
@mock.patch("paperless.settings.multiprocessing.cpu_count")
|
||||||
def test_single_core(self, cpu_count):
|
def test_single_core(self, cpu_count):
|
||||||
cpu_count.return_value = 1
|
cpu_count.return_value = 1
|
||||||
@ -21,7 +20,9 @@ class TestSettings(TestCase):
|
|||||||
|
|
||||||
def test_workers_threads(self):
|
def test_workers_threads(self):
|
||||||
for i in range(1, 64):
|
for i in range(1, 64):
|
||||||
with mock.patch("paperless.settings.multiprocessing.cpu_count") as cpu_count:
|
with mock.patch(
|
||||||
|
"paperless.settings.multiprocessing.cpu_count"
|
||||||
|
) as cpu_count:
|
||||||
cpu_count.return_value = i
|
cpu_count.return_value = i
|
||||||
|
|
||||||
default_workers = default_task_workers()
|
default_workers = default_task_workers()
|
||||||
|
@ -12,14 +12,27 @@ from documents.tests.utils import DirectoriesMixin
|
|||||||
|
|
||||||
|
|
||||||
class TestTasks(DirectoriesMixin, TestCase):
|
class TestTasks(DirectoriesMixin, TestCase):
|
||||||
|
|
||||||
def test_index_reindex(self):
|
def test_index_reindex(self):
|
||||||
Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now())
|
Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
content="my document",
|
||||||
|
checksum="wow",
|
||||||
|
added=timezone.now(),
|
||||||
|
created=timezone.now(),
|
||||||
|
modified=timezone.now(),
|
||||||
|
)
|
||||||
|
|
||||||
tasks.index_reindex()
|
tasks.index_reindex()
|
||||||
|
|
||||||
def test_index_optimize(self):
|
def test_index_optimize(self):
|
||||||
Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(), created=timezone.now(), modified=timezone.now())
|
Document.objects.create(
|
||||||
|
title="test",
|
||||||
|
content="my document",
|
||||||
|
checksum="wow",
|
||||||
|
added=timezone.now(),
|
||||||
|
created=timezone.now(),
|
||||||
|
modified=timezone.now(),
|
||||||
|
)
|
||||||
|
|
||||||
tasks.index_optimize()
|
tasks.index_optimize()
|
||||||
|
|
||||||
@ -92,7 +105,9 @@ class TestTasks(DirectoriesMixin, TestCase):
|
|||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
messages.warning("Some warning")
|
messages.warning("Some warning")
|
||||||
m.return_value = messages
|
m.return_value = messages
|
||||||
self.assertEqual(tasks.sanity_check(), "Sanity check exited with warnings. See log.")
|
self.assertEqual(
|
||||||
|
tasks.sanity_check(), "Sanity check exited with warnings. See log."
|
||||||
|
)
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
@mock.patch("documents.tasks.sanity_checker.check_sanity")
|
||||||
@ -100,11 +115,19 @@ class TestTasks(DirectoriesMixin, TestCase):
|
|||||||
messages = SanityCheckMessages()
|
messages = SanityCheckMessages()
|
||||||
messages.info("Some info")
|
messages.info("Some info")
|
||||||
m.return_value = messages
|
m.return_value = messages
|
||||||
self.assertEqual(tasks.sanity_check(), "Sanity check exited with infos. See log.")
|
self.assertEqual(
|
||||||
|
tasks.sanity_check(), "Sanity check exited with infos. See log."
|
||||||
|
)
|
||||||
m.assert_called_once()
|
m.assert_called_once()
|
||||||
|
|
||||||
def test_bulk_update_documents(self):
|
def test_bulk_update_documents(self):
|
||||||
doc1 = Document.objects.create(title="test", content="my document", checksum="wow", added=timezone.now(),
|
doc1 = Document.objects.create(
|
||||||
created=timezone.now(), modified=timezone.now())
|
title="test",
|
||||||
|
content="my document",
|
||||||
|
checksum="wow",
|
||||||
|
added=timezone.now(),
|
||||||
|
created=timezone.now(),
|
||||||
|
modified=timezone.now(),
|
||||||
|
)
|
||||||
|
|
||||||
tasks.bulk_update_documents([doc1.pk])
|
tasks.bulk_update_documents([doc1.pk])
|
||||||
|
@ -4,27 +4,52 @@ from django.test import TestCase
|
|||||||
|
|
||||||
|
|
||||||
class TestViews(TestCase):
|
class TestViews(TestCase):
|
||||||
|
|
||||||
def setUp(self) -> None:
|
def setUp(self) -> None:
|
||||||
self.user = User.objects.create_user("testuser")
|
self.user = User.objects.create_user("testuser")
|
||||||
|
|
||||||
def test_login_redirect(self):
|
def test_login_redirect(self):
|
||||||
response = self.client.get('/')
|
response = self.client.get("/")
|
||||||
self.assertEqual(response.status_code, 302)
|
self.assertEqual(response.status_code, 302)
|
||||||
self.assertEqual(response.url, "/accounts/login/?next=/")
|
self.assertEqual(response.url, "/accounts/login/?next=/")
|
||||||
|
|
||||||
def test_index(self):
|
def test_index(self):
|
||||||
self.client.force_login(self.user)
|
self.client.force_login(self.user)
|
||||||
for (language_given, language_actual) in [("", "en-US"), ("en-US", "en-US"), ("de", "de-DE"), ("en", "en-US"), ("en-us", "en-US"), ("fr", "fr-FR"), ("jp", "en-US")]:
|
for (language_given, language_actual) in [
|
||||||
|
("", "en-US"),
|
||||||
|
("en-US", "en-US"),
|
||||||
|
("de", "de-DE"),
|
||||||
|
("en", "en-US"),
|
||||||
|
("en-us", "en-US"),
|
||||||
|
("fr", "fr-FR"),
|
||||||
|
("jp", "en-US"),
|
||||||
|
]:
|
||||||
if language_given:
|
if language_given:
|
||||||
self.client.cookies.load({settings.LANGUAGE_COOKIE_NAME: language_given})
|
self.client.cookies.load(
|
||||||
|
{settings.LANGUAGE_COOKIE_NAME: language_given}
|
||||||
|
)
|
||||||
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
|
elif settings.LANGUAGE_COOKIE_NAME in self.client.cookies.keys():
|
||||||
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
|
self.client.cookies.pop(settings.LANGUAGE_COOKIE_NAME)
|
||||||
|
|
||||||
response = self.client.get('/', )
|
response = self.client.get(
|
||||||
|
"/",
|
||||||
|
)
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
self.assertEqual(response.context_data['webmanifest'], f"frontend/{language_actual}/manifest.webmanifest")
|
self.assertEqual(
|
||||||
self.assertEqual(response.context_data['styles_css'], f"frontend/{language_actual}/styles.css")
|
response.context_data["webmanifest"],
|
||||||
self.assertEqual(response.context_data['runtime_js'], f"frontend/{language_actual}/runtime.js")
|
f"frontend/{language_actual}/manifest.webmanifest",
|
||||||
self.assertEqual(response.context_data['polyfills_js'], f"frontend/{language_actual}/polyfills.js")
|
)
|
||||||
self.assertEqual(response.context_data['main_js'], f"frontend/{language_actual}/main.js")
|
self.assertEqual(
|
||||||
|
response.context_data["styles_css"],
|
||||||
|
f"frontend/{language_actual}/styles.css",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
response.context_data["runtime_js"],
|
||||||
|
f"frontend/{language_actual}/runtime.js",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
response.context_data["polyfills_js"],
|
||||||
|
f"frontend/{language_actual}/polyfills.js",
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
response.context_data["main_js"], f"frontend/{language_actual}/main.js"
|
||||||
|
)
|
||||||
|
@ -42,8 +42,7 @@ def setup_directories():
|
|||||||
LOGGING_DIR=dirs.logging_dir,
|
LOGGING_DIR=dirs.logging_dir,
|
||||||
INDEX_DIR=dirs.index_dir,
|
INDEX_DIR=dirs.index_dir,
|
||||||
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"),
|
MODEL_FILE=os.path.join(dirs.data_dir, "classification_model.pickle"),
|
||||||
MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock")
|
MEDIA_LOCK=os.path.join(dirs.media_dir, "media.lock"),
|
||||||
|
|
||||||
)
|
)
|
||||||
dirs.settings_override.enable()
|
dirs.settings_override.enable()
|
||||||
|
|
||||||
@ -70,7 +69,6 @@ def paperless_environment():
|
|||||||
|
|
||||||
|
|
||||||
class DirectoriesMixin:
|
class DirectoriesMixin:
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
def __init__(self, *args, **kwargs):
|
||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.dirs = None
|
self.dirs = None
|
||||||
@ -85,7 +83,6 @@ class DirectoriesMixin:
|
|||||||
|
|
||||||
|
|
||||||
class TestMigrations(TransactionTestCase):
|
class TestMigrations(TransactionTestCase):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def app(self):
|
def app(self):
|
||||||
return apps.get_containing_app_config(type(self).__module__).name
|
return apps.get_containing_app_config(type(self).__module__).name
|
||||||
@ -97,8 +94,11 @@ class TestMigrations(TransactionTestCase):
|
|||||||
def setUp(self):
|
def setUp(self):
|
||||||
super(TestMigrations, self).setUp()
|
super(TestMigrations, self).setUp()
|
||||||
|
|
||||||
assert self.migrate_from and self.migrate_to, \
|
assert (
|
||||||
"TestCase '{}' must define migrate_from and migrate_to properties".format(type(self).__name__)
|
self.migrate_from and self.migrate_to
|
||||||
|
), "TestCase '{}' must define migrate_from and migrate_to properties".format(
|
||||||
|
type(self).__name__
|
||||||
|
)
|
||||||
self.migrate_from = [(self.app, self.migrate_from)]
|
self.migrate_from = [(self.app, self.migrate_from)]
|
||||||
self.migrate_to = [(self.app, self.migrate_to)]
|
self.migrate_to = [(self.app, self.migrate_to)]
|
||||||
executor = MigrationExecutor(connection)
|
executor = MigrationExecutor(connection)
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user