mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Merge branch 'tikitu-refactor-file-info-extraction'
This commit is contained in:
		
						commit
						11e1b9783e
					
				@ -19,12 +19,11 @@ from PIL import Image
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.utils import timezone
 | 
					from django.utils import timezone
 | 
				
			||||||
from django.template.defaultfilters import slugify
 | 
					 | 
				
			||||||
from pyocr.tesseract import TesseractError
 | 
					from pyocr.tesseract import TesseractError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from paperless.db import GnuPG
 | 
					from paperless.db import GnuPG
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .models import Correspondent, Tag, Document, Log
 | 
					from .models import Tag, Document, Log, FileInfo
 | 
				
			||||||
from .languages import ISO639
 | 
					from .languages import ISO639
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -54,19 +53,6 @@ class Consumer(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 | 
					    DEFAULT_OCR_LANGUAGE = settings.OCR_LANGUAGE
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    REGEX_TITLE = re.compile(
 | 
					 | 
				
			||||||
        r"^.*/(.*)\.(pdf|jpe?g|png|gif|tiff)$",
 | 
					 | 
				
			||||||
        flags=re.IGNORECASE
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    REGEX_CORRESPONDENT_TITLE = re.compile(
 | 
					 | 
				
			||||||
        r"^.*/(.+) - (.*)\.(pdf|jpe?g|png|gif|tiff)$",
 | 
					 | 
				
			||||||
        flags=re.IGNORECASE
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    REGEX_CORRESPONDENT_TITLE_TAGS = re.compile(
 | 
					 | 
				
			||||||
        r"^.*/(.*) - (.*) - ([a-z0-9\-,]*)\.(pdf|jpe?g|png|gif|tiff)$",
 | 
					 | 
				
			||||||
        flags=re.IGNORECASE
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def __init__(self):
 | 
					    def __init__(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.logger = logging.getLogger(__name__)
 | 
					        self.logger = logging.getLogger(__name__)
 | 
				
			||||||
@ -105,7 +91,7 @@ class Consumer(object):
 | 
				
			|||||||
            if not os.path.isfile(doc):
 | 
					            if not os.path.isfile(doc):
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if not re.match(self.REGEX_TITLE, doc):
 | 
					            if not re.match(FileInfo.REGEXES["title"], doc):
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            if doc in self._ignore:
 | 
					            if doc in self._ignore:
 | 
				
			||||||
@ -269,72 +255,20 @@ class Consumer(object):
 | 
				
			|||||||
        # Strip out excess white space to allow matching to go smoother
 | 
					        # Strip out excess white space to allow matching to go smoother
 | 
				
			||||||
        return re.sub(r"\s+", " ", r)
 | 
					        return re.sub(r"\s+", " ", r)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _guess_attributes_from_name(self, parseable):
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
        We use a crude naming convention to make handling the correspondent,
 | 
					 | 
				
			||||||
        title, and tags easier:
 | 
					 | 
				
			||||||
          "<correspondent> - <title> - <tags>.<suffix>"
 | 
					 | 
				
			||||||
          "<correspondent> - <title>.<suffix>"
 | 
					 | 
				
			||||||
          "<title>.<suffix>"
 | 
					 | 
				
			||||||
        """
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def get_correspondent(correspondent_name):
 | 
					 | 
				
			||||||
            return Correspondent.objects.get_or_create(
 | 
					 | 
				
			||||||
                name=correspondent_name,
 | 
					 | 
				
			||||||
                defaults={"slug": slugify(correspondent_name)}
 | 
					 | 
				
			||||||
            )[0]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def get_tags(tags):
 | 
					 | 
				
			||||||
            r = []
 | 
					 | 
				
			||||||
            for t in tags.split(","):
 | 
					 | 
				
			||||||
                r.append(
 | 
					 | 
				
			||||||
                    Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
 | 
					 | 
				
			||||||
            return tuple(r)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        def get_suffix(suffix):
 | 
					 | 
				
			||||||
            suffix = suffix.lower()
 | 
					 | 
				
			||||||
            if suffix == "jpeg":
 | 
					 | 
				
			||||||
                return "jpg"
 | 
					 | 
				
			||||||
            return suffix
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # First attempt: "<correspondent> - <title> - <tags>.<suffix>"
 | 
					 | 
				
			||||||
        m = re.match(self.REGEX_CORRESPONDENT_TITLE_TAGS, parseable)
 | 
					 | 
				
			||||||
        if m:
 | 
					 | 
				
			||||||
            return (
 | 
					 | 
				
			||||||
                get_correspondent(m.group(1)),
 | 
					 | 
				
			||||||
                m.group(2),
 | 
					 | 
				
			||||||
                get_tags(m.group(3)),
 | 
					 | 
				
			||||||
                get_suffix(m.group(4))
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # Second attempt: "<correspondent> - <title>.<suffix>"
 | 
					 | 
				
			||||||
        m = re.match(self.REGEX_CORRESPONDENT_TITLE, parseable)
 | 
					 | 
				
			||||||
        if m:
 | 
					 | 
				
			||||||
            return (
 | 
					 | 
				
			||||||
                get_correspondent(m.group(1)),
 | 
					 | 
				
			||||||
                m.group(2),
 | 
					 | 
				
			||||||
                (),
 | 
					 | 
				
			||||||
                get_suffix(m.group(3))
 | 
					 | 
				
			||||||
            )
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        # That didn't work, so we assume correspondent and tags are None
 | 
					 | 
				
			||||||
        m = re.match(self.REGEX_TITLE, parseable)
 | 
					 | 
				
			||||||
        return None, m.group(1), (), get_suffix(m.group(2))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def _store(self, text, doc, thumbnail):
 | 
					    def _store(self, text, doc, thumbnail):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        sender, title, tags, file_type = self._guess_attributes_from_name(doc)
 | 
					        file_info = FileInfo.from_path(doc)
 | 
				
			||||||
        relevant_tags = set(list(Tag.match_all(text)) + list(tags))
 | 
					        relevant_tags = set(list(Tag.match_all(text)) + list(file_info.tags))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        stats = os.stat(doc)
 | 
					        stats = os.stat(doc)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        self.log("debug", "Saving record to database")
 | 
					        self.log("debug", "Saving record to database")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        document = Document.objects.create(
 | 
					        document = Document.objects.create(
 | 
				
			||||||
            correspondent=sender,
 | 
					            correspondent=file_info.correspondent,
 | 
				
			||||||
            title=title,
 | 
					            title=file_info.title,
 | 
				
			||||||
            content=text,
 | 
					            content=text,
 | 
				
			||||||
            file_type=file_type,
 | 
					            file_type=file_info.extension,
 | 
				
			||||||
            created=timezone.make_aware(
 | 
					            created=timezone.make_aware(
 | 
				
			||||||
                datetime.datetime.fromtimestamp(stats.st_mtime)),
 | 
					                datetime.datetime.fromtimestamp(stats.st_mtime)),
 | 
				
			||||||
            modified=timezone.make_aware(
 | 
					            modified=timezone.make_aware(
 | 
				
			||||||
 | 
				
			|||||||
@ -96,11 +96,16 @@ class Command(Renderable, BaseCommand):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    @staticmethod
 | 
					    @staticmethod
 | 
				
			||||||
    def _get_legacy_file_name(doc):
 | 
					    def _get_legacy_file_name(doc):
 | 
				
			||||||
        if doc.correspondent and doc.title:
 | 
					
 | 
				
			||||||
            tags = ",".join([t.slug for t in doc.tags.all()])
 | 
					        if not doc.correspondent and not doc.title:
 | 
				
			||||||
            if tags:
 | 
					            return os.path.basename(doc.source_path)
 | 
				
			||||||
                return "{} - {} - {}.{}".format(
 | 
					
 | 
				
			||||||
                    doc.correspondent, doc.title, tags, doc.file_type)
 | 
					        created = doc.created.strftime("%Y%m%d%H%M%SZ")
 | 
				
			||||||
            return "{} - {}.{}".format(
 | 
					        tags = ",".join([t.slug for t in doc.tags.all()])
 | 
				
			||||||
                doc.correspondent, doc.title, doc.file_type)
 | 
					
 | 
				
			||||||
        return os.path.basename(doc.source_path)
 | 
					        if tags:
 | 
				
			||||||
 | 
					            return "{} - {} - {} - {}.{}".format(
 | 
				
			||||||
 | 
					                created, doc.correspondent, doc.title, tags, doc.file_type)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return "{} - {} - {}.{}".format(
 | 
				
			||||||
 | 
					            created, doc.correspondent, doc.title, doc.file_type)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,8 +1,11 @@
 | 
				
			|||||||
 | 
					import dateutil.parser
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import os
 | 
					import os
 | 
				
			||||||
import re
 | 
					import re
 | 
				
			||||||
import uuid
 | 
					import uuid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from collections import OrderedDict
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from django.conf import settings
 | 
					from django.conf import settings
 | 
				
			||||||
from django.core.urlresolvers import reverse
 | 
					from django.core.urlresolvers import reverse
 | 
				
			||||||
from django.db import models
 | 
					from django.db import models
 | 
				
			||||||
@ -250,3 +253,136 @@ class Log(models.Model):
 | 
				
			|||||||
            self.group = uuid.uuid4()
 | 
					            self.group = uuid.uuid4()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        models.Model.save(self, *args, **kwargs)
 | 
					        models.Model.save(self, *args, **kwargs)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class FileInfo(object):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # This epic regex *almost* worked for our needs, so I'm keeping it here for
 | 
				
			||||||
 | 
					    # posterity, in the hopes that we might find a way to make it work one day.
 | 
				
			||||||
 | 
					    ALMOST_REGEX = re.compile(
 | 
				
			||||||
 | 
					        r"^((?P<date>\d\d\d\d\d\d\d\d\d\d\d\d\d\dZ){separator})?"
 | 
				
			||||||
 | 
					        r"((?P<correspondent>{non_separated_word}+){separator})??"
 | 
				
			||||||
 | 
					        r"(?P<title>{non_separated_word}+)"
 | 
				
			||||||
 | 
					        r"({separator}(?P<tags>[a-z,0-9-]+))?"
 | 
				
			||||||
 | 
					        r"\.(?P<extension>[a-zA-Z.-]+)$".format(
 | 
				
			||||||
 | 
					            separator=r"\s+-\s+",
 | 
				
			||||||
 | 
					            non_separated_word=r"([\w,. ]|([^\s]-))"
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    REGEXES = OrderedDict([
 | 
				
			||||||
 | 
					        ("created-correspondent-title-tags", re.compile(
 | 
				
			||||||
 | 
					            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
				
			||||||
 | 
					            r"(?P<correspondent>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<tags>[a-z0-9\-,]*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("created-title-tags", re.compile(
 | 
				
			||||||
 | 
					            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<tags>[a-z0-9\-,]*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("created-correspondent-title", re.compile(
 | 
				
			||||||
 | 
					            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
				
			||||||
 | 
					            r"(?P<correspondent>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("created-title", re.compile(
 | 
				
			||||||
 | 
					            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("correspondent-title-tags", re.compile(
 | 
				
			||||||
 | 
					            r"(?P<correspondent>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<tags>[a-z0-9\-,]*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("correspondent-title", re.compile(
 | 
				
			||||||
 | 
					            r"(?P<correspondent>.*) - "
 | 
				
			||||||
 | 
					            r"(?P<title>.*)?"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        )),
 | 
				
			||||||
 | 
					        ("title", re.compile(
 | 
				
			||||||
 | 
					            r"(?P<title>.*)"
 | 
				
			||||||
 | 
					            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff)$",
 | 
				
			||||||
 | 
					            flags=re.IGNORECASE
 | 
				
			||||||
 | 
					        ))
 | 
				
			||||||
 | 
					    ])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(self, created=None, correspondent=None, title=None, tags=(),
 | 
				
			||||||
 | 
					                 extension=None):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        self.created = created
 | 
				
			||||||
 | 
					        self.title = title
 | 
				
			||||||
 | 
					        self.extension = extension
 | 
				
			||||||
 | 
					        self.correspondent = correspondent
 | 
				
			||||||
 | 
					        self.tags = tags
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _get_created(cls, created):
 | 
				
			||||||
 | 
					        return dateutil.parser.parse("{:0<14}Z".format(created[:-1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _get_correspondent(cls, name):
 | 
				
			||||||
 | 
					        if not name:
 | 
				
			||||||
 | 
					            return None
 | 
				
			||||||
 | 
					        return Correspondent.objects.get_or_create(name=name, defaults={
 | 
				
			||||||
 | 
					            "slug": slugify(name)
 | 
				
			||||||
 | 
					        })[0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _get_title(cls, title):
 | 
				
			||||||
 | 
					        return title
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _get_tags(cls, tags):
 | 
				
			||||||
 | 
					        r = []
 | 
				
			||||||
 | 
					        for t in tags.split(","):
 | 
				
			||||||
 | 
					            r.append(
 | 
				
			||||||
 | 
					                Tag.objects.get_or_create(slug=t, defaults={"name": t})[0])
 | 
				
			||||||
 | 
					        return tuple(r)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _get_extension(cls, extension):
 | 
				
			||||||
 | 
					        r = extension.lower()
 | 
				
			||||||
 | 
					        if r == "jpeg":
 | 
				
			||||||
 | 
					            return "jpg"
 | 
				
			||||||
 | 
					        return r
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def _mangle_property(cls, properties, name):
 | 
				
			||||||
 | 
					        if name in properties:
 | 
				
			||||||
 | 
					            properties[name] = getattr(cls, "_get_{}".format(name))(
 | 
				
			||||||
 | 
					                properties[name]
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def from_path(cls, path):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        We use a crude naming convention to make handling the correspondent,
 | 
				
			||||||
 | 
					        title, and tags easier:
 | 
				
			||||||
 | 
					          "<correspondent> - <title> - <tags>.<suffix>"
 | 
				
			||||||
 | 
					          "<correspondent> - <title>.<suffix>"
 | 
				
			||||||
 | 
					          "<title>.<suffix>"
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for regex in cls.REGEXES.values():
 | 
				
			||||||
 | 
					            m = regex.match(os.path.basename(path))
 | 
				
			||||||
 | 
					            if m:
 | 
				
			||||||
 | 
					                properties = m.groupdict()
 | 
				
			||||||
 | 
					                cls._mangle_property(properties, "created")
 | 
				
			||||||
 | 
					                cls._mangle_property(properties, "correspondent")
 | 
				
			||||||
 | 
					                cls._mangle_property(properties, "title")
 | 
				
			||||||
 | 
					                cls._mangle_property(properties, "tags")
 | 
				
			||||||
 | 
					                cls._mangle_property(properties, "extension")
 | 
				
			||||||
 | 
					                return cls(**properties)
 | 
				
			||||||
 | 
				
			|||||||
@ -1,29 +1,36 @@
 | 
				
			|||||||
from django.test import TestCase
 | 
					from django.test import TestCase
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..consumer import Consumer
 | 
					from ..models import Document, FileInfo
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class TestAttachment(TestCase):
 | 
					class TestAttachment(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    TAGS = ("tag1", "tag2", "tag3")
 | 
					    TAGS = ("tag1", "tag2", "tag3")
 | 
				
			||||||
    CONSUMER = Consumer()
 | 
					    EXTENSIONS = (
 | 
				
			||||||
    SUFFIXES = (
 | 
					 | 
				
			||||||
        "pdf", "png", "jpg", "jpeg", "gif",
 | 
					        "pdf", "png", "jpg", "jpeg", "gif",
 | 
				
			||||||
        "PDF", "PNG", "JPG", "JPEG", "GIF",
 | 
					        "PDF", "PNG", "JPG", "JPEG", "GIF",
 | 
				
			||||||
        "PdF", "PnG", "JpG", "JPeG", "GiF",
 | 
					        "PdF", "PnG", "JpG", "JPeG", "GiF",
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _test_guess_attributes_from_name(self, path, sender, title, tags):
 | 
					    def _test_guess_attributes_from_name(self, path, sender, title, tags):
 | 
				
			||||||
        for suffix in self.SUFFIXES:
 | 
					
 | 
				
			||||||
            f = path.format(suffix)
 | 
					        for extension in self.EXTENSIONS:
 | 
				
			||||||
            results = self.CONSUMER._guess_attributes_from_name(f)
 | 
					
 | 
				
			||||||
            self.assertEqual(results[0].name, sender, f)
 | 
					            f = path.format(extension)
 | 
				
			||||||
            self.assertEqual(results[1], title, f)
 | 
					            file_info = FileInfo.from_path(f)
 | 
				
			||||||
            self.assertEqual(tuple([t.slug for t in results[2]]), tags, f)
 | 
					
 | 
				
			||||||
            if suffix.lower() == "jpeg":
 | 
					            if sender:
 | 
				
			||||||
                self.assertEqual(results[3], "jpg", f)
 | 
					                self.assertEqual(file_info.correspondent.name, sender, f)
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                self.assertEqual(results[3], suffix.lower(), f)
 | 
					                self.assertIsNone(file_info.correspondent, f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            self.assertEqual(file_info.title, title, f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            self.assertEqual(tuple([t.slug for t in file_info.tags]), tags, f)
 | 
				
			||||||
 | 
					            if extension.lower() == "jpeg":
 | 
				
			||||||
 | 
					                self.assertEqual(file_info.extension, "jpg", f)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                self.assertEqual(file_info.extension, extension.lower(), f)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_guess_attributes_from_name0(self):
 | 
					    def test_guess_attributes_from_name0(self):
 | 
				
			||||||
        self._test_guess_attributes_from_name(
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
@ -92,3 +99,206 @@ class TestAttachment(TestCase):
 | 
				
			|||||||
            "Τιτλε",
 | 
					            "Τιτλε",
 | 
				
			||||||
            self.TAGS
 | 
					            self.TAGS
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_guess_attributes_from_name_when_correspondent_empty(self):
 | 
				
			||||||
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
 | 
					            '/path/to/ - weird empty correspondent but should not break.{}',
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					            'weird empty correspondent but should not break',
 | 
				
			||||||
 | 
					            ()
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_guess_attributes_from_name_when_title_starts_with_dash(self):
 | 
				
			||||||
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
 | 
					            '/path/to/- weird but should not break.{}',
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					            '- weird but should not break',
 | 
				
			||||||
 | 
					            ()
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_guess_attributes_from_name_when_title_ends_with_dash(self):
 | 
				
			||||||
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
 | 
					            '/path/to/weird but should not break -.{}',
 | 
				
			||||||
 | 
					            None,
 | 
				
			||||||
 | 
					            'weird but should not break -',
 | 
				
			||||||
 | 
					            ()
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_guess_attributes_from_name_when_title_is_empty(self):
 | 
				
			||||||
 | 
					        self._test_guess_attributes_from_name(
 | 
				
			||||||
 | 
					            '/path/to/weird correspondent but should not break - .{}',
 | 
				
			||||||
 | 
					            'weird correspondent but should not break',
 | 
				
			||||||
 | 
					            '',
 | 
				
			||||||
 | 
					            ()
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Permutations(TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    valid_dates = (
 | 
				
			||||||
 | 
					        "20150102030405Z",
 | 
				
			||||||
 | 
					        "20150102Z",
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    valid_correspondents = [
 | 
				
			||||||
 | 
					        "timmy",
 | 
				
			||||||
 | 
					        "Dr. McWheelie",
 | 
				
			||||||
 | 
					        "Dash Gor-don",
 | 
				
			||||||
 | 
					        "ο Θερμαστής",
 | 
				
			||||||
 | 
					        ""
 | 
				
			||||||
 | 
					    ]
 | 
				
			||||||
 | 
					    valid_titles = ["title", "Title w Spaces", "Title a-dash", "Τίτλος", ""]
 | 
				
			||||||
 | 
					    valid_tags = ["tag", "tig,tag", "tag1,tag2,tag-3"]
 | 
				
			||||||
 | 
					    valid_extensions = ["pdf", "png", "jpg", "jpeg", "gif"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def _test_guessed_attributes(self, filename, created=None,
 | 
				
			||||||
 | 
					                                 correspondent=None, title=None,
 | 
				
			||||||
 | 
					                                 extension=None, tags=None):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # print(filename)
 | 
				
			||||||
 | 
					        info = FileInfo.from_path(filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Created
 | 
				
			||||||
 | 
					        if created is None:
 | 
				
			||||||
 | 
					            self.assertIsNone(info.created, filename)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.assertEqual(info.created.year, int(created[:4]), filename)
 | 
				
			||||||
 | 
					            self.assertEqual(info.created.month, int(created[4:6]), filename)
 | 
				
			||||||
 | 
					            self.assertEqual(info.created.day, int(created[6:8]), filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Correspondent
 | 
				
			||||||
 | 
					        if correspondent:
 | 
				
			||||||
 | 
					            self.assertEqual(info.correspondent.name, correspondent, filename)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.assertEqual(info.correspondent, None, filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Title
 | 
				
			||||||
 | 
					        self.assertEqual(info.title, title, filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Tags
 | 
				
			||||||
 | 
					        if tags is None:
 | 
				
			||||||
 | 
					            self.assertEqual(info.tags, (), filename)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            self.assertEqual(
 | 
				
			||||||
 | 
					                [t.slug for t in info.tags], tags.split(','),
 | 
				
			||||||
 | 
					                filename
 | 
				
			||||||
 | 
					            )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Extension
 | 
				
			||||||
 | 
					        if extension == 'jpeg':
 | 
				
			||||||
 | 
					            extension = 'jpg'
 | 
				
			||||||
 | 
					        self.assertEqual(info.extension, extension, filename)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_just_title(self):
 | 
				
			||||||
 | 
					        template = '/path/to/{title}.{extension}'
 | 
				
			||||||
 | 
					        for title in self.valid_titles:
 | 
				
			||||||
 | 
					            for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                spec = dict(title=title, extension=extension)
 | 
				
			||||||
 | 
					                filename = template.format(**spec)
 | 
				
			||||||
 | 
					                self._test_guessed_attributes(filename, **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_title_and_correspondent(self):
 | 
				
			||||||
 | 
					        template = '/path/to/{correspondent} - {title}.{extension}'
 | 
				
			||||||
 | 
					        for correspondent in self.valid_correspondents:
 | 
				
			||||||
 | 
					            for title in self.valid_titles:
 | 
				
			||||||
 | 
					                for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                    spec = dict(correspondent=correspondent, title=title,
 | 
				
			||||||
 | 
					                                extension=extension)
 | 
				
			||||||
 | 
					                    filename = template.format(**spec)
 | 
				
			||||||
 | 
					                    self._test_guessed_attributes(filename, **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_title_and_correspondent_and_tags(self):
 | 
				
			||||||
 | 
					        template = '/path/to/{correspondent} - {title} - {tags}.{extension}'
 | 
				
			||||||
 | 
					        for correspondent in self.valid_correspondents:
 | 
				
			||||||
 | 
					            for title in self.valid_titles:
 | 
				
			||||||
 | 
					                for tags in self.valid_tags:
 | 
				
			||||||
 | 
					                    for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                        spec = dict(correspondent=correspondent, title=title,
 | 
				
			||||||
 | 
					                                    tags=tags, extension=extension)
 | 
				
			||||||
 | 
					                        filename = template.format(**spec)
 | 
				
			||||||
 | 
					                        self._test_guessed_attributes(filename, **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_created_and_correspondent_and_title_and_tags(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        template = ("/path/to/{created} - "
 | 
				
			||||||
 | 
					                    "{correspondent} - "
 | 
				
			||||||
 | 
					                    "{title} - "
 | 
				
			||||||
 | 
					                    "{tags}"
 | 
				
			||||||
 | 
					                    ".{extension}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for created in self.valid_dates:
 | 
				
			||||||
 | 
					            for correspondent in self.valid_correspondents:
 | 
				
			||||||
 | 
					                for title in self.valid_titles:
 | 
				
			||||||
 | 
					                    for tags in self.valid_tags:
 | 
				
			||||||
 | 
					                        for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                            spec = {
 | 
				
			||||||
 | 
					                                "created": created,
 | 
				
			||||||
 | 
					                                "correspondent": correspondent,
 | 
				
			||||||
 | 
					                                "title": title,
 | 
				
			||||||
 | 
					                                "tags": tags,
 | 
				
			||||||
 | 
					                                "extension": extension
 | 
				
			||||||
 | 
					                            }
 | 
				
			||||||
 | 
					                            self._test_guessed_attributes(
 | 
				
			||||||
 | 
					                                template.format(**spec), **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_created_and_correspondent_and_title(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        template = ("/path/to/{created} - "
 | 
				
			||||||
 | 
					                    "{correspondent} - "
 | 
				
			||||||
 | 
					                    "{title}"
 | 
				
			||||||
 | 
					                    ".{extension}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for created in self.valid_dates:
 | 
				
			||||||
 | 
					            for correspondent in self.valid_correspondents:
 | 
				
			||||||
 | 
					                for title in self.valid_titles:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    # Skip cases where title looks like a tag as we can't
 | 
				
			||||||
 | 
					                    # accommodate such cases.
 | 
				
			||||||
 | 
					                    if title.lower() == title:
 | 
				
			||||||
 | 
					                        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                        spec = {
 | 
				
			||||||
 | 
					                            "created": created,
 | 
				
			||||||
 | 
					                            "correspondent": correspondent,
 | 
				
			||||||
 | 
					                            "title": title,
 | 
				
			||||||
 | 
					                            "extension": extension
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        self._test_guessed_attributes(
 | 
				
			||||||
 | 
					                            template.format(**spec), **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_created_and_title(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        template = ("/path/to/{created} - "
 | 
				
			||||||
 | 
					                    "{title}"
 | 
				
			||||||
 | 
					                    ".{extension}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for created in self.valid_dates:
 | 
				
			||||||
 | 
					            for title in self.valid_titles:
 | 
				
			||||||
 | 
					                for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                    spec = {
 | 
				
			||||||
 | 
					                        "created": created,
 | 
				
			||||||
 | 
					                        "title": title,
 | 
				
			||||||
 | 
					                        "extension": extension
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    self._test_guessed_attributes(
 | 
				
			||||||
 | 
					                        template.format(**spec), **spec)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_created_and_title_and_tags(self):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        template = ("/path/to/{created} - "
 | 
				
			||||||
 | 
					                    "{title} - "
 | 
				
			||||||
 | 
					                    "{tags}"
 | 
				
			||||||
 | 
					                    ".{extension}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for created in self.valid_dates:
 | 
				
			||||||
 | 
					            for title in self.valid_titles:
 | 
				
			||||||
 | 
					                for tags in self.valid_tags:
 | 
				
			||||||
 | 
					                    for extension in self.valid_extensions:
 | 
				
			||||||
 | 
					                        spec = {
 | 
				
			||||||
 | 
					                            "created": created,
 | 
				
			||||||
 | 
					                            "title": title,
 | 
				
			||||||
 | 
					                            "tags": tags,
 | 
				
			||||||
 | 
					                            "extension": extension
 | 
				
			||||||
 | 
					                        }
 | 
				
			||||||
 | 
					                        self._test_guessed_attributes(
 | 
				
			||||||
 | 
					                            template.format(**spec), **spec)
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user