mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	mime type handling
This commit is contained in:
		
							parent
							
								
									bd45a804a7
								
							
						
					
					
						commit
						41650f20f4
					
				@ -50,7 +50,7 @@ class DocumentTypeAdmin(admin.ModelAdmin):
 | 
				
			|||||||
class DocumentAdmin(admin.ModelAdmin):
 | 
					class DocumentAdmin(admin.ModelAdmin):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    search_fields = ("correspondent__name", "title", "content", "tags__name")
 | 
					    search_fields = ("correspondent__name", "title", "content", "tags__name")
 | 
				
			||||||
    readonly_fields = ("added", "file_type", "storage_type", "filename")
 | 
					    readonly_fields = ("added", "mime_type", "storage_type", "filename")
 | 
				
			||||||
    list_display = (
 | 
					    list_display = (
 | 
				
			||||||
        "title",
 | 
					        "title",
 | 
				
			||||||
        "created",
 | 
					        "created",
 | 
				
			||||||
@ -58,8 +58,7 @@ class DocumentAdmin(admin.ModelAdmin):
 | 
				
			|||||||
        "correspondent",
 | 
					        "correspondent",
 | 
				
			||||||
        "tags_",
 | 
					        "tags_",
 | 
				
			||||||
        "archive_serial_number",
 | 
					        "archive_serial_number",
 | 
				
			||||||
        "document_type",
 | 
					        "document_type"
 | 
				
			||||||
        "filename"
 | 
					 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
    list_filter = (
 | 
					    list_filter = (
 | 
				
			||||||
        "document_type",
 | 
					        "document_type",
 | 
				
			||||||
 | 
				
			|||||||
@ -2,8 +2,8 @@ import datetime
 | 
				
			|||||||
import hashlib
 | 
					import hashlib
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import magic
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.db import transaction
 | 
					from django.db import transaction
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
@ -13,7 +13,7 @@ from .classifier import DocumentClassifier, IncompatibleClassifierVersionError
 | 
				
			|||||||
from .file_handling import generate_filename, create_source_path_directory
 | 
					from .file_handling import generate_filename, create_source_path_directory
 | 
				
			||||||
from .loggers import LoggingMixin
 | 
					from .loggers import LoggingMixin
 | 
				
			||||||
from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 | 
					from .models import Document, FileInfo, Correspondent, DocumentType, Tag
 | 
				
			||||||
from .parsers import ParseError, get_parser_class
 | 
					from .parsers import ParseError, get_parser_class_for_mime_type
 | 
				
			||||||
from .signals import (
 | 
					from .signals import (
 | 
				
			||||||
    document_consumption_finished,
 | 
					    document_consumption_finished,
 | 
				
			||||||
    document_consumption_started
 | 
					    document_consumption_started
 | 
				
			||||||
@ -51,12 +51,6 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                "Consumption directory {} does not exist".format(
 | 
					                "Consumption directory {} does not exist".format(
 | 
				
			||||||
                    settings.CONSUMPTION_DIR))
 | 
					                    settings.CONSUMPTION_DIR))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def pre_check_regex(self):
 | 
					 | 
				
			||||||
        if not re.match(FileInfo.REGEXES["title"], self.filename):
 | 
					 | 
				
			||||||
            raise ConsumerError(
 | 
					 | 
				
			||||||
                "Filename {} does not seem to be safe to "
 | 
					 | 
				
			||||||
                "consume".format(self.filename))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def pre_check_duplicate(self):
 | 
					    def pre_check_duplicate(self):
 | 
				
			||||||
        with open(self.path, "rb") as f:
 | 
					        with open(self.path, "rb") as f:
 | 
				
			||||||
            checksum = hashlib.md5(f.read()).hexdigest()
 | 
					            checksum = hashlib.md5(f.read()).hexdigest()
 | 
				
			||||||
@ -100,18 +94,19 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
        self.pre_check_file_exists()
 | 
					        self.pre_check_file_exists()
 | 
				
			||||||
        self.pre_check_consumption_dir()
 | 
					        self.pre_check_consumption_dir()
 | 
				
			||||||
        self.pre_check_directories()
 | 
					        self.pre_check_directories()
 | 
				
			||||||
        self.pre_check_regex()
 | 
					 | 
				
			||||||
        self.pre_check_duplicate()
 | 
					        self.pre_check_duplicate()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log("info", "Consuming {}".format(self.filename))
 | 
					        self.log("info", "Consuming {}".format(self.filename))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Determine the parser class.
 | 
					        # Determine the parser class.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parser_class = get_parser_class(self.filename)
 | 
					        mime_type = magic.from_file(self.path, mime=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        parser_class = get_parser_class_for_mime_type(mime_type)
 | 
				
			||||||
        if not parser_class:
 | 
					        if not parser_class:
 | 
				
			||||||
            raise ConsumerError("No parsers abvailable for {}".format(self.filename))
 | 
					            raise ConsumerError("No parsers abvailable for {}".format(self.filename))
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            self.log("debug", "Parser: {}".format(parser_class.__name__))
 | 
					            self.log("debug", "Parser: {} based on mime type {}".format(parser_class.__name__, mime_type))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # Notify all listeners that we're going to do some work.
 | 
					        # Notify all listeners that we're going to do some work.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -162,7 +157,8 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                # store the document.
 | 
					                # store the document.
 | 
				
			||||||
                document = self._store(
 | 
					                document = self._store(
 | 
				
			||||||
                    text=text,
 | 
					                    text=text,
 | 
				
			||||||
                    date=date
 | 
					                    date=date,
 | 
				
			||||||
 | 
					                    mime_type=mime_type
 | 
				
			||||||
                )
 | 
					                )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                # If we get here, it was successful. Proceed with post-consume
 | 
					                # If we get here, it was successful. Proceed with post-consume
 | 
				
			||||||
@ -197,7 +193,7 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return document
 | 
					        return document
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _store(self, text, date):
 | 
					    def _store(self, text, date, mime_type):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        # If someone gave us the original filename, use it instead of doc.
 | 
					        # If someone gave us the original filename, use it instead of doc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -220,7 +216,7 @@ class Consumer(LoggingMixin):
 | 
				
			|||||||
                correspondent=file_info.correspondent,
 | 
					                correspondent=file_info.correspondent,
 | 
				
			||||||
                title=file_info.title,
 | 
					                title=file_info.title,
 | 
				
			||||||
                content=text,
 | 
					                content=text,
 | 
				
			||||||
                file_type=file_info.extension,
 | 
					                mime_type=mime_type,
 | 
				
			||||||
                checksum=hashlib.md5(f.read()).hexdigest(),
 | 
					                checksum=hashlib.md5(f.read()).hexdigest(),
 | 
				
			||||||
                created=created,
 | 
					                created=created,
 | 
				
			||||||
                modified=created,
 | 
					                modified=created,
 | 
				
			||||||
 | 
				
			|||||||
@ -91,9 +91,9 @@ def generate_filename(document):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    # Always append the primary key to guarantee uniqueness of filename
 | 
					    # Always append the primary key to guarantee uniqueness of filename
 | 
				
			||||||
    if len(path) > 0:
 | 
					    if len(path) > 0:
 | 
				
			||||||
        filename = "%s-%07i.%s" % (path, document.pk, document.file_type)
 | 
					        filename = "%s-%07i%s" % (path, document.pk, document.file_type)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        filename = "%07i.%s" % (document.pk, document.file_type)
 | 
					        filename = "%07i%s" % (document.pk, document.file_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # Append .gpg for encrypted files
 | 
					    # Append .gpg for encrypted files
 | 
				
			||||||
    if document.storage_type == document.STORAGE_TYPE_GPG:
 | 
					    if document.storage_type == document.STORAGE_TYPE_GPG:
 | 
				
			||||||
 | 
				
			|||||||
@ -127,8 +127,8 @@ class Command(Renderable, BaseCommand):
 | 
				
			|||||||
        tags = ",".join([t.slug for t in doc.tags.all()])
 | 
					        tags = ",".join([t.slug for t in doc.tags.all()])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if tags:
 | 
					        if tags:
 | 
				
			||||||
            return "{} - {} - {} - {}.{}".format(
 | 
					            return "{} - {} - {} - {}{}".format(
 | 
				
			||||||
                created, doc.correspondent, doc.title, tags, doc.file_type)
 | 
					                created, doc.correspondent, doc.title, tags, doc.file_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return "{} - {} - {}.{}".format(
 | 
					        return "{} - {} - {}{}".format(
 | 
				
			||||||
            created, doc.correspondent, doc.title, doc.file_type)
 | 
					            created, doc.correspondent, doc.title, doc.file_type)
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										50
									
								
								src/documents/migrations/1003_mime_types.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								src/documents/migrations/1003_mime_types.py
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,50 @@
 | 
				
			|||||||
 | 
					# Generated by Django 3.1.3 on 2020-11-20 11:21
 | 
				
			||||||
 | 
					import os
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					import magic
 | 
				
			||||||
 | 
					from django.conf import settings
 | 
				
			||||||
 | 
					from django.db import migrations, models
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def source_path(self):
 | 
				
			||||||
 | 
					    if self.filename:
 | 
				
			||||||
 | 
					        fname = str(self.filename)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        fname = "{:07}.{}".format(self.pk, self.file_type)
 | 
				
			||||||
 | 
					        if self.storage_type == self.STORAGE_TYPE_GPG:
 | 
				
			||||||
 | 
					            fname += ".gpg"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return os.path.join(
 | 
				
			||||||
 | 
					        settings.ORIGINALS_DIR,
 | 
				
			||||||
 | 
					        fname
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def add_mime_types(apps, schema_editor):
 | 
				
			||||||
 | 
					    Document = apps.get_model("documents", "Document")
 | 
				
			||||||
 | 
					    documents = Document.objects.all()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for d in documents:
 | 
				
			||||||
 | 
					        d.mime_type = magic.from_file(source_path(d), mime=True)
 | 
				
			||||||
 | 
					        d.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Migration(migrations.Migration):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    dependencies = [
 | 
				
			||||||
 | 
					        ('documents', '1002_auto_20201111_1105'),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    operations = [
 | 
				
			||||||
 | 
					        migrations.AddField(
 | 
				
			||||||
 | 
					            model_name='document',
 | 
				
			||||||
 | 
					            name='mime_type',
 | 
				
			||||||
 | 
					            field=models.CharField(default="-", editable=False, max_length=256),
 | 
				
			||||||
 | 
					            preserve_default=False,
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					        migrations.RunPython(add_mime_types),
 | 
				
			||||||
 | 
					        migrations.RemoveField(
 | 
				
			||||||
 | 
					            model_name='document',
 | 
				
			||||||
 | 
					            name='file_type',
 | 
				
			||||||
 | 
					        ),
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
@ -1,6 +1,7 @@
 | 
				
			|||||||
# coding=utf-8
 | 
					# coding=utf-8
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
 | 
					import mimetypes
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
from collections import OrderedDict
 | 
					from collections import OrderedDict
 | 
				
			||||||
@ -113,18 +114,6 @@ class DocumentType(MatchingModel):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
class Document(models.Model):
 | 
					class Document(models.Model):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # TODO: why do we need an explicit list
 | 
					 | 
				
			||||||
    TYPE_PDF = "pdf"
 | 
					 | 
				
			||||||
    TYPE_PNG = "png"
 | 
					 | 
				
			||||||
    TYPE_JPG = "jpg"
 | 
					 | 
				
			||||||
    TYPE_GIF = "gif"
 | 
					 | 
				
			||||||
    TYPE_TIF = "tiff"
 | 
					 | 
				
			||||||
    TYPE_TXT = "txt"
 | 
					 | 
				
			||||||
    TYPE_CSV = "csv"
 | 
					 | 
				
			||||||
    TYPE_MD = "md"
 | 
					 | 
				
			||||||
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
 | 
					 | 
				
			||||||
             TYPE_TXT, TYPE_CSV, TYPE_MD)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 | 
					    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 | 
				
			||||||
    STORAGE_TYPE_GPG = "gpg"
 | 
					    STORAGE_TYPE_GPG = "gpg"
 | 
				
			||||||
    STORAGE_TYPES = (
 | 
					    STORAGE_TYPES = (
 | 
				
			||||||
@ -156,10 +145,9 @@ class Document(models.Model):
 | 
				
			|||||||
                  "primarily used for searching."
 | 
					                  "primarily used for searching."
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    file_type = models.CharField(
 | 
					    mime_type = models.CharField(
 | 
				
			||||||
        max_length=4,
 | 
					        max_length=256,
 | 
				
			||||||
        editable=False,
 | 
					        editable=False
 | 
				
			||||||
        choices=tuple([(t, t.upper()) for t in TYPES])
 | 
					 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    tags = models.ManyToManyField(
 | 
					    tags = models.ManyToManyField(
 | 
				
			||||||
@ -223,7 +211,7 @@ class Document(models.Model):
 | 
				
			|||||||
        if self.filename:
 | 
					        if self.filename:
 | 
				
			||||||
            fname = str(self.filename)
 | 
					            fname = str(self.filename)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            fname = "{:07}.{}".format(self.pk, self.file_type)
 | 
					            fname = "{:07}{}".format(self.pk, self.file_type)
 | 
				
			||||||
            if self.storage_type == self.STORAGE_TYPE_GPG:
 | 
					            if self.storage_type == self.STORAGE_TYPE_GPG:
 | 
				
			||||||
                fname += ".gpg"
 | 
					                fname += ".gpg"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -238,7 +226,11 @@ class Document(models.Model):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def file_name(self):
 | 
					    def file_name(self):
 | 
				
			||||||
        return slugify(str(self)) + "." + self.file_type
 | 
					        return slugify(str(self)) + self.file_type
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def file_type(self):
 | 
				
			||||||
 | 
					        return mimetypes.guess_extension(str(self.mime_type))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def thumbnail_path(self):
 | 
					    def thumbnail_path(self):
 | 
				
			||||||
 | 
				
			|||||||
@ -6,6 +6,7 @@ import subprocess
 | 
				
			|||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import dateparser
 | 
					import dateparser
 | 
				
			||||||
 | 
					import magic
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -37,10 +38,11 @@ DATE_REGEX = re.compile(
 | 
				
			|||||||
logger = logging.getLogger(__name__)
 | 
					logger = logging.getLogger(__name__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_parser_class(doc):
 | 
					def is_mime_type_supported(mime_type):
 | 
				
			||||||
    """
 | 
					    return get_parser_class_for_mime_type(mime_type) is not None
 | 
				
			||||||
    Determine the appropriate parser class based on the file
 | 
					
 | 
				
			||||||
    """
 | 
					
 | 
				
			||||||
 | 
					def get_parser_class_for_mime_type(mime_type):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    options = []
 | 
					    options = []
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -48,9 +50,9 @@ def get_parser_class(doc):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    for response in document_consumer_declaration.send(None):
 | 
					    for response in document_consumer_declaration.send(None):
 | 
				
			||||||
        parser_declaration = response[1]
 | 
					        parser_declaration = response[1]
 | 
				
			||||||
        parser_test = parser_declaration["test"]
 | 
					        supported_mime_types = parser_declaration["mime_types"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if parser_test(doc):
 | 
					        if mime_type in supported_mime_types:
 | 
				
			||||||
            options.append(parser_declaration)
 | 
					            options.append(parser_declaration)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if not options:
 | 
					    if not options:
 | 
				
			||||||
@ -61,6 +63,16 @@ def get_parser_class(doc):
 | 
				
			|||||||
        options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
 | 
					        options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_parser_class(path):
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					    Determine the appropriate parser class based on the file
 | 
				
			||||||
 | 
					    """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    mime_type = magic.from_file(path, mime=True)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return get_parser_class_for_mime_type(mime_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
 | 
					def run_convert(input_file, output_file, density=None, scale=None, alpha=None, strip=False, trim=False, type=None, depth=None, extra=None, logging_group=None):
 | 
				
			||||||
    environment = os.environ.copy()
 | 
					    environment = os.environ.copy()
 | 
				
			||||||
    if settings.CONVERT_MEMORY_LIMIT:
 | 
					    if settings.CONVERT_MEMORY_LIMIT:
 | 
				
			||||||
 | 
				
			|||||||
@ -91,7 +91,7 @@ class DocumentSerializer(serializers.ModelSerializer):
 | 
				
			|||||||
            "document_type_id",
 | 
					            "document_type_id",
 | 
				
			||||||
            "title",
 | 
					            "title",
 | 
				
			||||||
            "content",
 | 
					            "content",
 | 
				
			||||||
            "file_type",
 | 
					            "mime_type",
 | 
				
			||||||
            "tags",
 | 
					            "tags",
 | 
				
			||||||
            "tags_id",
 | 
					            "tags_id",
 | 
				
			||||||
            "checksum",
 | 
					            "checksum",
 | 
				
			||||||
 | 
				
			|||||||
@ -45,7 +45,7 @@ class DocumentApiTest(APITestCase):
 | 
				
			|||||||
        dt = DocumentType.objects.create(name="dt", pk=63)
 | 
					        dt = DocumentType.objects.create(name="dt", pk=63)
 | 
				
			||||||
        tag = Tag.objects.create(name="t", pk=85)
 | 
					        tag = Tag.objects.create(name="t", pk=85)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123")
 | 
					        doc = Document.objects.create(title="WOW", content="the content", correspondent=c, document_type=dt, checksum="123", mime_type="application/pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc.tags.add(tag)
 | 
					        doc.tags.add(tag)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -95,7 +95,7 @@ class DocumentApiTest(APITestCase):
 | 
				
			|||||||
        with open(filename, "wb") as f:
 | 
					        with open(filename, "wb") as f:
 | 
				
			||||||
            f.write(content)
 | 
					            f.write(content)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(title="none", filename=os.path.basename(filename), file_type="pdf")
 | 
					        doc = Document.objects.create(title="none", filename=os.path.basename(filename), mime_type="application/pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
 | 
					        with open(os.path.join(self.thumbnail_dir, "{:07d}.png".format(doc.pk)), "wb") as f:
 | 
				
			||||||
            f.write(content_thumbnail)
 | 
					            f.write(content_thumbnail)
 | 
				
			||||||
@ -117,7 +117,7 @@ class DocumentApiTest(APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_document_actions_not_existing_file(self):
 | 
					    def test_document_actions_not_existing_file(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc = Document.objects.create(title="none", filename=os.path.basename("asd"), file_type="pdf")
 | 
					        doc = Document.objects.create(title="none", filename=os.path.basename("asd"), mime_type="application/pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
 | 
					        response = self.client.get('/api/documents/{}/download/'.format(doc.pk))
 | 
				
			||||||
        self.assertEqual(response.status_code, 404)
 | 
					        self.assertEqual(response.status_code, 404)
 | 
				
			||||||
@ -130,9 +130,9 @@ class DocumentApiTest(APITestCase):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    def test_document_filters(self):
 | 
					    def test_document_filters(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        doc1 = Document.objects.create(title="none1", checksum="A")
 | 
					        doc1 = Document.objects.create(title="none1", checksum="A", mime_type="application/pdf")
 | 
				
			||||||
        doc2 = Document.objects.create(title="none2", checksum="B")
 | 
					        doc2 = Document.objects.create(title="none2", checksum="B", mime_type="application/pdf")
 | 
				
			||||||
        doc3 = Document.objects.create(title="none3", checksum="C")
 | 
					        doc3 = Document.objects.create(title="none3", checksum="C", mime_type="application/pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
 | 
					        tag_inbox = Tag.objects.create(name="t1", is_inbox_tag=True)
 | 
				
			||||||
        tag_2 = Tag.objects.create(name="t2")
 | 
					        tag_2 = Tag.objects.create(name="t2")
 | 
				
			||||||
 | 
				
			|||||||
@ -437,6 +437,18 @@ class FaultyParser(DocumentParser):
 | 
				
			|||||||
        raise ParseError("Does not compute.")
 | 
					        raise ParseError("Does not compute.")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fake_magic_from_file(file, mime=False):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if mime:
 | 
				
			||||||
 | 
					        if os.path.splitext(file)[1] == ".pdf":
 | 
				
			||||||
 | 
					            return "application/pdf"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return "unknown"
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return "A verbose string that describes the contents of the file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@mock.patch("documents.consumer.magic.from_file", fake_magic_from_file)
 | 
				
			||||||
class TestConsumer(TestCase):
 | 
					class TestConsumer(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_dummy_parser(self, path, logging_group):
 | 
					    def make_dummy_parser(self, path, logging_group):
 | 
				
			||||||
@ -462,7 +474,7 @@ class TestConsumer(TestCase):
 | 
				
			|||||||
        m = patcher.start()
 | 
					        m = patcher.start()
 | 
				
			||||||
        m.return_value = [(None, {
 | 
					        m.return_value = [(None, {
 | 
				
			||||||
            "parser": self.make_dummy_parser,
 | 
					            "parser": self.make_dummy_parser,
 | 
				
			||||||
            "test": lambda _: True,
 | 
					            "mime_types": ["application/pdf"],
 | 
				
			||||||
            "weight": 0
 | 
					            "weight": 0
 | 
				
			||||||
        })]
 | 
					        })]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -592,7 +604,7 @@ class TestConsumer(TestCase):
 | 
				
			|||||||
    def testFaultyParser(self, m):
 | 
					    def testFaultyParser(self, m):
 | 
				
			||||||
        m.return_value = [(None, {
 | 
					        m.return_value = [(None, {
 | 
				
			||||||
            "parser": self.make_faulty_parser,
 | 
					            "parser": self.make_faulty_parser,
 | 
				
			||||||
            "test": lambda _: True,
 | 
					            "mime_types": ["application/pdf"],
 | 
				
			||||||
            "weight": 0
 | 
					            "weight": 0
 | 
				
			||||||
        })]
 | 
					        })]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
@ -13,9 +13,12 @@ class TestDocument(TestCase):
 | 
				
			|||||||
            title="Title",
 | 
					            title="Title",
 | 
				
			||||||
            content="content",
 | 
					            content="content",
 | 
				
			||||||
            checksum="checksum",
 | 
					            checksum="checksum",
 | 
				
			||||||
 | 
					            mime_type="application/pdf"
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        file_path = document.source_path
 | 
					        file_path = document.source_path
 | 
				
			||||||
        thumb_path = document.thumbnail_path
 | 
					        thumb_path = document.thumbnail_path
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
 | 
					        with mock.patch("documents.signals.handlers.os.unlink") as mock_unlink:
 | 
				
			||||||
            document.delete()
 | 
					            document.delete()
 | 
				
			||||||
            mock_unlink.assert_any_call(file_path)
 | 
					            mock_unlink.assert_any_call(file_path)
 | 
				
			||||||
 | 
				
			|||||||
@ -31,7 +31,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="")
 | 
				
			||||||
    def test_generate_source_filename(self):
 | 
					    def test_generate_source_filename(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -44,7 +44,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_file_renaming(self):
 | 
					    def test_file_renaming(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -81,7 +81,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_file_renaming_missing_permissions(self):
 | 
					    def test_file_renaming_missing_permissions(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -111,10 +111,10 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_file_renaming_database_error(self):
 | 
					    def test_file_renaming_database_error(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document1 = Document.objects.create(file_type="pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
 | 
					        document1 = Document.objects.create(mime_type="application/pdf", storage_type=Document.STORAGE_TYPE_UNENCRYPTED, checksum="AAAAA")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.checksum = "BBBBB"
 | 
					        document.checksum = "BBBBB"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
@ -149,7 +149,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_document_delete(self):
 | 
					    def test_document_delete(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -170,7 +170,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_document_delete_nofile(self):
 | 
					    def test_document_delete_nofile(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -179,7 +179,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_directory_not_empty(self):
 | 
					    def test_directory_not_empty(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -206,7 +206,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
				
			||||||
    def test_tags_with_underscore(self):
 | 
					    def test_tags_with_underscore(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -222,7 +222,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
				
			||||||
    def test_tags_with_dash(self):
 | 
					    def test_tags_with_dash(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -238,7 +238,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[type]}")
 | 
				
			||||||
    def test_tags_malformed(self):
 | 
					    def test_tags_malformed(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -254,7 +254,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[0]}")
 | 
				
			||||||
    def test_tags_all(self):
 | 
					    def test_tags_all(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -269,7 +269,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{tags[1]}")
 | 
				
			||||||
    def test_tags_out_of_bounds(self):
 | 
					    def test_tags_out_of_bounds(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -284,7 +284,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
 | 
					    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{correspondent}/{correspondent}")
 | 
				
			||||||
    def test_nested_directory_cleanup(self):
 | 
					    def test_nested_directory_cleanup(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
        document.save()
 | 
					        document.save()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -309,7 +309,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    def test_format_none(self):
 | 
					    def test_format_none(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.pk = 1
 | 
					        document.pk = 1
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
					        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
				
			||||||
@ -335,7 +335,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    def test_invalid_format(self):
 | 
					    def test_invalid_format(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.pk = 1
 | 
					        document.pk = 1
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
					        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
				
			||||||
@ -344,7 +344,7 @@ class TestDate(TestCase):
 | 
				
			|||||||
    def test_invalid_format_key(self):
 | 
					    def test_invalid_format_key(self):
 | 
				
			||||||
        document = Document()
 | 
					        document = Document()
 | 
				
			||||||
        document.pk = 1
 | 
					        document.pk = 1
 | 
				
			||||||
        document.file_type = "pdf"
 | 
					        document.mime_type = "application/pdf"
 | 
				
			||||||
        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
					        document.storage_type = Document.STORAGE_TYPE_UNENCRYPTED
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
					        self.assertEqual(generate_filename(document), "0000001.pdf")
 | 
				
			||||||
 | 
				
			|||||||
@ -213,7 +213,7 @@ class TestDocumentConsumptionFinishedSignal(TestCase):
 | 
				
			|||||||
        TestCase.setUp(self)
 | 
					        TestCase.setUp(self)
 | 
				
			||||||
        User.objects.create_user(username='test_consumer', password='12345')
 | 
					        User.objects.create_user(username='test_consumer', password='12345')
 | 
				
			||||||
        self.doc_contains = Document.objects.create(
 | 
					        self.doc_contains = Document.objects.create(
 | 
				
			||||||
            content="I contain the keyword.", file_type="pdf")
 | 
					            content="I contain the keyword.", mime_type="application/pdf")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_tag_applied_any(self):
 | 
					    def test_tag_applied_any(self):
 | 
				
			||||||
        t1 = Tag.objects.create(
 | 
					        t1 = Tag.objects.create(
 | 
				
			||||||
 | 
				
			|||||||
@ -1,3 +1,4 @@
 | 
				
			|||||||
 | 
					import os
 | 
				
			||||||
from tempfile import TemporaryDirectory
 | 
					from tempfile import TemporaryDirectory
 | 
				
			||||||
from unittest import mock
 | 
					from unittest import mock
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -5,7 +6,18 @@ from django.test import TestCase
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from documents.parsers import get_parser_class
 | 
					from documents.parsers import get_parser_class
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def fake_magic_from_file(file, mime=False):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if mime:
 | 
				
			||||||
 | 
					        if os.path.splitext(file)[1] == ".pdf":
 | 
				
			||||||
 | 
					            return "application/pdf"
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return "unknown"
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return "A verbose string that describes the contents of the file"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@mock.patch("documents.parsers.magic.from_file", fake_magic_from_file)
 | 
				
			||||||
class TestParserDiscovery(TestCase):
 | 
					class TestParserDiscovery(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    @mock.patch("documents.parsers.document_consumer_declaration.send")
 | 
					    @mock.patch("documents.parsers.document_consumer_declaration.send")
 | 
				
			||||||
@ -14,7 +26,7 @@ class TestParserDiscovery(TestCase):
 | 
				
			|||||||
            pass
 | 
					            pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        m.return_value = (
 | 
					        m.return_value = (
 | 
				
			||||||
            (None, {"weight": 0, "parser": DummyParser, "test": lambda _: True}),
 | 
					            (None, {"weight": 0, "parser": DummyParser, "mime_types": ["application/pdf"]}),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
@ -32,8 +44,8 @@ class TestParserDiscovery(TestCase):
 | 
				
			|||||||
            pass
 | 
					            pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        m.return_value = (
 | 
					        m.return_value = (
 | 
				
			||||||
            (None, {"weight": 0, "parser": DummyParser1, "test": lambda _: True}),
 | 
					            (None, {"weight": 0, "parser": DummyParser1, "mime_types": ["application/pdf"]}),
 | 
				
			||||||
            (None, {"weight": 1, "parser": DummyParser2, "test": lambda _: True}),
 | 
					            (None, {"weight": 1, "parser": DummyParser2, "mime_types": ["application/pdf"]}),
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.assertEqual(
 | 
					        self.assertEqual(
 | 
				
			||||||
 | 
				
			|||||||
@ -104,18 +104,6 @@ class DocumentViewSet(RetrieveModelMixin,
 | 
				
			|||||||
        return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
 | 
					        return super(DocumentViewSet, self).destroy(request, *args, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def file_response(self, pk, disposition):
 | 
					    def file_response(self, pk, disposition):
 | 
				
			||||||
        # TODO: this should not be necessary here.
 | 
					 | 
				
			||||||
        content_types = {
 | 
					 | 
				
			||||||
            Document.TYPE_PDF: "application/pdf",
 | 
					 | 
				
			||||||
            Document.TYPE_PNG: "image/png",
 | 
					 | 
				
			||||||
            Document.TYPE_JPG: "image/jpeg",
 | 
					 | 
				
			||||||
            Document.TYPE_GIF: "image/gif",
 | 
					 | 
				
			||||||
            Document.TYPE_TIF: "image/tiff",
 | 
					 | 
				
			||||||
            Document.TYPE_CSV: "text/csv",
 | 
					 | 
				
			||||||
            Document.TYPE_MD: "text/markdown",
 | 
					 | 
				
			||||||
            Document.TYPE_TXT: "text/plain"
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        doc = Document.objects.get(id=pk)
 | 
					        doc = Document.objects.get(id=pk)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if doc.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
 | 
					        if doc.storage_type == Document.STORAGE_TYPE_UNENCRYPTED:
 | 
				
			||||||
@ -123,7 +111,7 @@ class DocumentViewSet(RetrieveModelMixin,
 | 
				
			|||||||
        else:
 | 
					        else:
 | 
				
			||||||
            file_handle = GnuPG.decrypted(doc.source_file)
 | 
					            file_handle = GnuPG.decrypted(doc.source_file)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        response = HttpResponse(file_handle, content_type=content_types[doc.file_type])
 | 
					        response = HttpResponse(file_handle, content_type=doc.mime_type)
 | 
				
			||||||
        response["Content-Disposition"] = '{}; filename="{}"'.format(
 | 
					        response["Content-Disposition"] = '{}; filename="{}"'.format(
 | 
				
			||||||
            disposition, doc.file_name)
 | 
					            disposition, doc.file_name)
 | 
				
			||||||
        return response
 | 
					        return response
 | 
				
			||||||
 | 
				
			|||||||
@ -10,6 +10,7 @@ from imap_tools import MailBox, MailBoxUnencrypted, AND, MailMessageFlags, \
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from documents.loggers import LoggingMixin
 | 
					from documents.loggers import LoggingMixin
 | 
				
			||||||
from documents.models import Correspondent
 | 
					from documents.models import Correspondent
 | 
				
			||||||
 | 
					from documents.parsers import is_mime_type_supported
 | 
				
			||||||
from paperless_mail.models import MailAccount, MailRule
 | 
					from paperless_mail.models import MailAccount, MailRule
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -249,8 +250,7 @@ class MailAccountHandler(LoggingMixin):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
            title = get_title(message, att, rule)
 | 
					            title = get_title(message, att, rule)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            # TODO: check with parsers what files types are supported
 | 
					            if is_mime_type_supported(att.content_type):
 | 
				
			||||||
            if att.content_type == 'application/pdf':
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
                os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
					                os.makedirs(settings.SCRATCH_DIR, exist_ok=True)
 | 
				
			||||||
                _, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
 | 
					                _, temp_filename = tempfile.mkstemp(prefix="paperless-mail-", dir=settings.SCRATCH_DIR)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,5 +1,3 @@
 | 
				
			|||||||
import re
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from .parsers import RasterisedDocumentParser
 | 
					from .parsers import RasterisedDocumentParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -7,12 +5,9 @@ def tesseract_consumer_declaration(sender, **kwargs):
 | 
				
			|||||||
    return {
 | 
					    return {
 | 
				
			||||||
        "parser": RasterisedDocumentParser,
 | 
					        "parser": RasterisedDocumentParser,
 | 
				
			||||||
        "weight": 0,
 | 
					        "weight": 0,
 | 
				
			||||||
        "test": tesseract_consumer_test
 | 
					        "mime_types": [
 | 
				
			||||||
 | 
					            "application/pdf",
 | 
				
			||||||
 | 
					            "image/jpeg",
 | 
				
			||||||
 | 
					            "image/png"
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MATCHING_FILES = re.compile(r"^.*\.(pdf|jpe?g|gif|png|tiff?|pnm|bmp)$")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def tesseract_consumer_test(doc):
 | 
					 | 
				
			||||||
    return MATCHING_FILES.match(doc.lower())
 | 
					 | 
				
			||||||
 | 
				
			|||||||
@ -1,36 +0,0 @@
 | 
				
			|||||||
from django.test import TestCase
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from paperless_tesseract.signals import tesseract_consumer_test
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
class SignalsTestCase(TestCase):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_test_handles_various_file_names_true(self):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        prefixes = (
 | 
					 | 
				
			||||||
            "doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
 | 
					 | 
				
			||||||
            "A document with a . in it", "Doc with -- in it"
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        suffixes = (
 | 
					 | 
				
			||||||
            "pdf", "jpg", "jpeg", "gif", "png", "tiff", "tif", "pnm", "bmp",
 | 
					 | 
				
			||||||
            "PDF", "JPG", "JPEG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
 | 
					 | 
				
			||||||
            "pDf", "jPg", "jpEg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for prefix in prefixes:
 | 
					 | 
				
			||||||
            for suffix in suffixes:
 | 
					 | 
				
			||||||
                name = "{}.{}".format(prefix, suffix)
 | 
					 | 
				
			||||||
                self.assertTrue(tesseract_consumer_test(name))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_test_handles_various_file_names_false(self):
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        prefixes = ("doc",)
 | 
					 | 
				
			||||||
        suffixes = ("txt", "markdown", "",)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        for prefix in prefixes:
 | 
					 | 
				
			||||||
            for suffix in suffixes:
 | 
					 | 
				
			||||||
                name = "{}.{}".format(prefix, suffix)
 | 
					 | 
				
			||||||
                self.assertFalse(tesseract_consumer_test(name))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.assertFalse(tesseract_consumer_test(""))
 | 
					 | 
				
			||||||
        self.assertFalse(tesseract_consumer_test("doc"))
 | 
					 | 
				
			||||||
@ -1,5 +1,3 @@
 | 
				
			|||||||
import re
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from .parsers import TextDocumentParser
 | 
					from .parsers import TextDocumentParser
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -7,12 +5,8 @@ def text_consumer_declaration(sender, **kwargs):
 | 
				
			|||||||
    return {
 | 
					    return {
 | 
				
			||||||
        "parser": TextDocumentParser,
 | 
					        "parser": TextDocumentParser,
 | 
				
			||||||
        "weight": 10,
 | 
					        "weight": 10,
 | 
				
			||||||
        "test": text_consumer_test
 | 
					        "mime_types": [
 | 
				
			||||||
 | 
					            "text/plain",
 | 
				
			||||||
 | 
					            "text/comma-separated-values"
 | 
				
			||||||
 | 
					        ]
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
MATCHING_FILES = re.compile(r"^.*\.(te?xt|md|csv)$")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def text_consumer_test(doc):
 | 
					 | 
				
			||||||
    return MATCHING_FILES.match(doc.lower())
 | 
					 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user