mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-04 03:27:12 -05:00 
			
		
		
		
	Merge branch 'ovv-consumer-cli-args'
This commit is contained in:
		
						commit
						4a25e9655c
					
				@ -80,6 +80,13 @@ you'll need to have it start in the background -- something you'll need to
 | 
			
		||||
figure out for your own system.  To get you started though, there are Systemd
 | 
			
		||||
service files in the ``scripts`` directory.
 | 
			
		||||
 | 
			
		||||
Some command line arguments are available to customize the behavior of the
 | 
			
		||||
consumer. By default it will use ``/etc/paperless.conf`` values. Display the
 | 
			
		||||
help with:
 | 
			
		||||
 | 
			
		||||
.. code-block:: shell-session
 | 
			
		||||
 | 
			
		||||
    $ /path/to/paperless/src/manage.py document_consumer --help
 | 
			
		||||
 | 
			
		||||
.. _utilities-exporter:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -32,31 +32,31 @@ class Consumer:
 | 
			
		||||
      5. Delete the document and image(s)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    SCRATCH = settings.SCRATCH_DIR
 | 
			
		||||
    CONSUME = settings.CONSUMPTION_DIR
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
    def __init__(self, consume=settings.CONSUMPTION_DIR,
 | 
			
		||||
                 scratch=settings.SCRATCH_DIR):
 | 
			
		||||
 | 
			
		||||
        self.logger = logging.getLogger(__name__)
 | 
			
		||||
        self.logging_group = None
 | 
			
		||||
 | 
			
		||||
        self.stats = {}
 | 
			
		||||
        self._ignore = []
 | 
			
		||||
        self.consume = consume
 | 
			
		||||
        self.scratch = scratch
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            os.makedirs(self.SCRATCH)
 | 
			
		||||
            os.makedirs(self.scratch)
 | 
			
		||||
        except FileExistsError:
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        self.stats = {}
 | 
			
		||||
        self._ignore = []
 | 
			
		||||
 | 
			
		||||
        if not self.CONSUME:
 | 
			
		||||
        if not self.consume:
 | 
			
		||||
            raise ConsumerError(
 | 
			
		||||
                "The CONSUMPTION_DIR settings variable does not appear to be "
 | 
			
		||||
                "set."
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if not os.path.exists(self.CONSUME):
 | 
			
		||||
        if not os.path.exists(self.consume):
 | 
			
		||||
            raise ConsumerError(
 | 
			
		||||
                "Consumption directory {} does not exist".format(self.CONSUME))
 | 
			
		||||
                "Consumption directory {} does not exist".format(self.consume))
 | 
			
		||||
 | 
			
		||||
        self.parsers = []
 | 
			
		||||
        for response in document_consumer_declaration.send(self):
 | 
			
		||||
@ -73,11 +73,11 @@ class Consumer:
 | 
			
		||||
            "group": self.logging_group
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    def consume(self):
 | 
			
		||||
    def run(self):
 | 
			
		||||
 | 
			
		||||
        for doc in os.listdir(self.CONSUME):
 | 
			
		||||
        for doc in os.listdir(self.consume):
 | 
			
		||||
 | 
			
		||||
            doc = os.path.join(self.CONSUME, doc)
 | 
			
		||||
            doc = os.path.join(self.consume, doc)
 | 
			
		||||
 | 
			
		||||
            if not os.path.isfile(doc):
 | 
			
		||||
                continue
 | 
			
		||||
@ -226,8 +226,8 @@ class Consumer:
 | 
			
		||||
 | 
			
		||||
    def _is_ready(self, doc):
 | 
			
		||||
        """
 | 
			
		||||
        Detect whether `doc` is ready to consume or if it's still being written
 | 
			
		||||
        to by the uploader.
 | 
			
		||||
        Detect whether ``doc`` is ready to consume or if it's still being
 | 
			
		||||
        written to by the uploader.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        t = os.stat(doc).st_mtime
 | 
			
		||||
 | 
			
		||||
@ -92,7 +92,7 @@ class UploadForm(forms.Form):
 | 
			
		||||
 | 
			
		||||
        t = int(mktime(datetime.now().timetuple()))
 | 
			
		||||
        file_name = os.path.join(
 | 
			
		||||
            Consumer.CONSUME,
 | 
			
		||||
            settings.CONSUMPTION_DIR,
 | 
			
		||||
            "{} - {}.{}".format(correspondent, title, self._file_type)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -151,7 +151,7 @@ class Attachment(object):
 | 
			
		||||
 | 
			
		||||
class MailFetcher(Loggable):
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
    def __init__(self, consume=settings.CONSUMPTION_DIR):
 | 
			
		||||
 | 
			
		||||
        Loggable.__init__(self)
 | 
			
		||||
 | 
			
		||||
@ -165,6 +165,7 @@ class MailFetcher(Loggable):
 | 
			
		||||
        self._enabled = bool(self._host)
 | 
			
		||||
 | 
			
		||||
        self.last_checked = datetime.datetime.now()
 | 
			
		||||
        self.consume = consume
 | 
			
		||||
 | 
			
		||||
    def pull(self):
 | 
			
		||||
        """
 | 
			
		||||
@ -185,7 +186,7 @@ class MailFetcher(Loggable):
 | 
			
		||||
                self.log("info", 'Storing email: "{}"'.format(message.subject))
 | 
			
		||||
 | 
			
		||||
                t = int(time.mktime(message.time.timetuple()))
 | 
			
		||||
                file_name = os.path.join(Consumer.CONSUME, message.file_name)
 | 
			
		||||
                file_name = os.path.join(self.consume, message.file_name)
 | 
			
		||||
                with open(file_name, "wb") as f:
 | 
			
		||||
                    f.write(message.attachment.data)
 | 
			
		||||
                    os.utime(file_name, times=(t, t))
 | 
			
		||||
 | 
			
		||||
@ -16,9 +16,6 @@ class Command(BaseCommand):
 | 
			
		||||
    consumption directory, and fetch any mail available.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    LOOP_TIME = settings.CONSUMER_LOOP_TIME
 | 
			
		||||
    MAIL_DELTA = datetime.timedelta(minutes=10)
 | 
			
		||||
 | 
			
		||||
    ORIGINAL_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "originals")
 | 
			
		||||
    THUMB_DOCS = os.path.join(settings.MEDIA_ROOT, "documents", "thumbnails")
 | 
			
		||||
 | 
			
		||||
@ -32,13 +29,41 @@ class Command(BaseCommand):
 | 
			
		||||
 | 
			
		||||
        BaseCommand.__init__(self, *args, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def add_arguments(self, parser):
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            "directory",
 | 
			
		||||
            default=settings.CONSUMPTION_DIR,
 | 
			
		||||
            nargs="?",
 | 
			
		||||
            help="The consumption directory."
 | 
			
		||||
        )
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            "--loop-time",
 | 
			
		||||
            default=settings.CONSUMER_LOOP_TIME,
 | 
			
		||||
            type=int,
 | 
			
		||||
            help="Wait time between each loop (in seconds)."
 | 
			
		||||
        )
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            "--mail-delta",
 | 
			
		||||
            default=10,
 | 
			
		||||
            type=int,
 | 
			
		||||
            help="Wait time between each mail fetch (in minutes)."
 | 
			
		||||
        )
 | 
			
		||||
        parser.add_argument(
 | 
			
		||||
            "--oneshot",
 | 
			
		||||
            action="store_true",
 | 
			
		||||
            help="Run only once."
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def handle(self, *args, **options):
 | 
			
		||||
 | 
			
		||||
        self.verbosity = options["verbosity"]
 | 
			
		||||
        directory = options["directory"]
 | 
			
		||||
        loop_time = options["loop_time"]
 | 
			
		||||
        mail_delta = datetime.timedelta(minutes=options["mail_delta"])
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self.file_consumer = Consumer()
 | 
			
		||||
            self.mail_fetcher = MailFetcher()
 | 
			
		||||
            self.file_consumer = Consumer(consume=directory)
 | 
			
		||||
            self.mail_fetcher = MailFetcher(consume=directory)
 | 
			
		||||
        except (ConsumerError, MailFetcherError) as e:
 | 
			
		||||
            raise CommandError(e)
 | 
			
		||||
 | 
			
		||||
@ -49,27 +74,32 @@ class Command(BaseCommand):
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        logging.getLogger(__name__).info(
 | 
			
		||||
            "Starting document consumer at {}".format(settings.CONSUMPTION_DIR)
 | 
			
		||||
            "Starting document consumer at {}".format(directory)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if options["oneshot"]:
 | 
			
		||||
            self.loop(mail_delta=mail_delta)
 | 
			
		||||
        else:
 | 
			
		||||
            try:
 | 
			
		||||
                while True:
 | 
			
		||||
                self.loop()
 | 
			
		||||
                time.sleep(self.LOOP_TIME)
 | 
			
		||||
                    self.loop(mail_delta=mail_delta)
 | 
			
		||||
                    time.sleep(loop_time)
 | 
			
		||||
                    if self.verbosity > 1:
 | 
			
		||||
                    print(".")
 | 
			
		||||
                        print(".", int(time.time()))
 | 
			
		||||
            except KeyboardInterrupt:
 | 
			
		||||
                print("Exiting")
 | 
			
		||||
 | 
			
		||||
    def loop(self):
 | 
			
		||||
 | 
			
		||||
        # Consume whatever files we can
 | 
			
		||||
        self.file_consumer.consume()
 | 
			
		||||
    def loop(self, mail_delta):
 | 
			
		||||
 | 
			
		||||
        # Occasionally fetch mail and store it to be consumed on the next loop
 | 
			
		||||
        # We fetch email when we first start up so that it is not necessary to
 | 
			
		||||
        # wait for 10 minutes after making changes to the config file.
 | 
			
		||||
        delta = self.mail_fetcher.last_checked + self.MAIL_DELTA
 | 
			
		||||
        delta = self.mail_fetcher.last_checked + mail_delta
 | 
			
		||||
        if self.first_iteration or delta < datetime.datetime.now():
 | 
			
		||||
            self.first_iteration = False
 | 
			
		||||
            self.mail_fetcher.pull()
 | 
			
		||||
 | 
			
		||||
        # Consume whatever files we can.
 | 
			
		||||
        # We have to run twice as the first run checks for file readiness
 | 
			
		||||
        for i in range(2):
 | 
			
		||||
            self.file_consumer.run()
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
from django.test import TestCase
 | 
			
		||||
from unittest import mock
 | 
			
		||||
from tempfile import TemporaryDirectory
 | 
			
		||||
 | 
			
		||||
from ..consumer import Consumer
 | 
			
		||||
from ..models import FileInfo
 | 
			
		||||
@ -16,7 +17,6 @@ class TestConsumer(TestCase):
 | 
			
		||||
            self.DummyParser
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.Consumer.CONSUME")
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
@ -32,18 +32,22 @@ class TestConsumer(TestCase):
 | 
			
		||||
            (None, lambda _: {"weight": 0, "parser": DummyParser1}),
 | 
			
		||||
            (None, lambda _: {"weight": 1, "parser": DummyParser2}),
 | 
			
		||||
        )
 | 
			
		||||
        with TemporaryDirectory() as tmpdir:
 | 
			
		||||
            self.assertEqual(
 | 
			
		||||
                Consumer(consume=tmpdir)._get_parser_class("doc.pdf"),
 | 
			
		||||
                DummyParser2
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.Consumer.CONSUME")
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
    def test__get_parser_class_0_parsers(self, m, *args):
 | 
			
		||||
        m.return_value = ((None, lambda _: None),)
 | 
			
		||||
        self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))
 | 
			
		||||
        with TemporaryDirectory() as tmpdir:
 | 
			
		||||
            self.assertIsNone(
 | 
			
		||||
                Consumer(consume=tmpdir)._get_parser_class("doc.pdf")
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
    @mock.patch("documents.consumer.Consumer.CONSUME")
 | 
			
		||||
    @mock.patch("documents.consumer.os.makedirs")
 | 
			
		||||
    @mock.patch("documents.consumer.os.path.exists", return_value=True)
 | 
			
		||||
    @mock.patch("documents.consumer.document_consumer_declaration.send")
 | 
			
		||||
@ -51,7 +55,8 @@ class TestConsumer(TestCase):
 | 
			
		||||
        m.return_value = (
 | 
			
		||||
            (None, lambda _: {"weight": 0, "parser": self.DummyParser}),
 | 
			
		||||
        )
 | 
			
		||||
        return Consumer()
 | 
			
		||||
        with TemporaryDirectory() as tmpdir:
 | 
			
		||||
            return Consumer(consume=tmpdir)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestAttributes(TestCase):
 | 
			
		||||
 | 
			
		||||
@ -6,7 +6,6 @@ exclude = migrations, paperless/settings.py, .tox
 | 
			
		||||
DJANGO_SETTINGS_MODULE=paperless.settings
 | 
			
		||||
addopts = --pythonwarnings=all -n auto
 | 
			
		||||
env =
 | 
			
		||||
  PAPERLESS_CONSUME=/tmp
 | 
			
		||||
  PAPERLESS_PASSPHRASE=THISISNOTASECRET
 | 
			
		||||
  PAPERLESS_SECRET=paperless
 | 
			
		||||
  PAPERLESS_EMAIL_SECRET=paperless
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user