mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-25 15:52:35 -04:00 
			
		
		
		
	s/Sender/Correspondent & reworked the (im|ex)porter
This commit is contained in:
		
							parent
							
								
									fad466477b
								
							
						
					
					
						commit
						070463b85a
					
				| @ -44,10 +44,10 @@ Any document you put into the consumption directory will be consumed, but if you | |||||||
| name the file right, it'll automatically set some values in the database for | name the file right, it'll automatically set some values in the database for | ||||||
| you.  This is is the logic the consumer follows: | you.  This is is the logic the consumer follows: | ||||||
| 
 | 
 | ||||||
| 1. Try to find the sender, title, and tags in the file name following the | 1. Try to find the correspondent, title, and tags in the file name following | ||||||
|    pattern: ``Sender - Title - tag,tag,tag.pdf``. |    the pattern: ``Correspondent - Title - tag,tag,tag.pdf``. | ||||||
| 2. If that doesn't work, try to find the sender and title in the file name | 2. If that doesn't work, try to find the correspondent and title in the file | ||||||
|    following the pattern:  ``Sender - Title.pdf``. |    name following the pattern:  ``Correspondent - Title.pdf``. | ||||||
| 3. If that doesn't work, just assume that the name of the file is the title. | 3. If that doesn't work, just assume that the name of the file is the title. | ||||||
| 
 | 
 | ||||||
| So given the above, the following examples would work as you'd expect: | So given the above, the following examples would work as you'd expect: | ||||||
| @ -97,9 +97,9 @@ So, with all that in mind, here's what you do to get it running: | |||||||
|    the configured email account every 10 minutes for something new and pull down |    the configured email account every 10 minutes for something new and pull down | ||||||
|    whatever it finds. |    whatever it finds. | ||||||
| 4. Send yourself an email!  Note that the subject is treated as the file name, | 4. Send yourself an email!  Note that the subject is treated as the file name, | ||||||
|    so if you set the subject to ``Sender - Title - tag,tag,tag``, you'll get |    so if you set the subject to ``Correspondent - Title - tag,tag,tag``, you'll | ||||||
|    what you expect.  Also, you must include the aforementioned secret string in |    get what you expect.  Also, you must include the aforementioned secret | ||||||
|    every email so the fetcher knows that it's safe to import. |    string in every email so the fetcher knows that it's safe to import. | ||||||
| 5. After a few minutes, the consumer will poll your mailbox, pull down the | 5. After a few minutes, the consumer will poll your mailbox, pull down the | ||||||
|    message, and place the attachment in the consumption directory with the |    message, and place the attachment in the consumption directory with the | ||||||
|    appropriate name.  A few minutes later, the consumer will import it like any |    appropriate name.  A few minutes later, the consumer will import it like any | ||||||
| @ -118,16 +118,16 @@ a real API, it's just a URL that accepts an HTTP POST. | |||||||
| To push your document to *Paperless*, send an HTTP POST to the server with the | To push your document to *Paperless*, send an HTTP POST to the server with the | ||||||
| following name/value pairs: | following name/value pairs: | ||||||
| 
 | 
 | ||||||
| * ``sender``: The name of the document's sender.  Note that there are | * ``correspondent``: The name of the document's correspondent.  Note that there | ||||||
|   restrictions on what characters you can use here.  Specifically, alphanumeric |   are restrictions on what characters you can use here.  Specifically, | ||||||
|   characters, `-`, `,`, `.`, and `'` are ok, everything else it out.  You also |   alphanumeric characters, `-`, `,`, `.`, and `'` are ok, everything else it | ||||||
|   can't use the sequence ` - ` (space, dash, space). |   out.  You also can't use the sequence ` - ` (space, dash, space). | ||||||
| * ``title``: The title of the document.  The rules for characters is the same | * ``title``: The title of the document.  The rules for characters is the same | ||||||
|   here as the sender. |   here as the correspondent. | ||||||
| * ``signature``: For security reasons, we have the sender send a signature using | * ``signature``: For security reasons, we have the correspondent send a | ||||||
|   a "shared secret" method to make sure that random strangers don't start |   signature using a "shared secret" method to make sure that random strangers | ||||||
|   uploading stuff to your server.  The means of generating this signature is |   don't start uploading stuff to your server.  The means of generating this | ||||||
|   defined below. |   signature is defined below. | ||||||
| 
 | 
 | ||||||
| Specify ``enctype="multipart/form-data"``, and then POST your file with::: | Specify ``enctype="multipart/form-data"``, and then POST your file with::: | ||||||
| 
 | 
 | ||||||
| @ -146,12 +146,12 @@ verification. | |||||||
| 
 | 
 | ||||||
| In the case of *Paperless*, you configure the server with the secret by setting | In the case of *Paperless*, you configure the server with the secret by setting | ||||||
| ``UPLOAD_SHARED_SECRET``.  Then on your client, you generate your signature by | ``UPLOAD_SHARED_SECRET``.  Then on your client, you generate your signature by | ||||||
| concatenating the sender, title, and the secret, and then using sha256 to | concatenating the correspondent, title, and the secret, and then using sha256 | ||||||
| generate a hexdigest. | to generate a hexdigest. | ||||||
| 
 | 
 | ||||||
| If you're using Python, this is what that looks like: | If you're using Python, this is what that looks like: | ||||||
| 
 | 
 | ||||||
| .. code:: python | .. code:: python | ||||||
| 
 | 
 | ||||||
|     from hashlib import sha256 |     from hashlib import sha256 | ||||||
|     signature = sha256(sender + title + secret).hexdigest() |     signature = sha256(correspondent + title + secret).hexdigest() | ||||||
|  | |||||||
| @ -4,10 +4,68 @@ Migrating, Updates, and Backups | |||||||
| =============================== | =============================== | ||||||
| 
 | 
 | ||||||
| As *Paperless* is still under active development, there's a lot that can change | As *Paperless* is still under active development, there's a lot that can change | ||||||
| as software updates roll out.  The thing you just need to remember for all of | as software updates roll out.  You should backup often, so if anything goes | ||||||
| this is that for the most part, **the database is expendable** so long as you | wrong during an update, you at least have a means of restoring to something | ||||||
| have your files.  This is because the file name of the exported files includes | usable.  Thankfully, there are automated ways of backing up, restoring, and | ||||||
| the name of the sender, the title, and the tags (if any) on each file. | updating the software. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _migrating-backup: | ||||||
|  | 
 | ||||||
|  | Backing Up | ||||||
|  | ---------- | ||||||
|  | 
 | ||||||
|  | So you're bored of this whole project, or you want to make a remote backup of | ||||||
|  | the unencrypted files for whatever reason.  This is easy to do, simply use the | ||||||
|  | :ref:`exporter <utilities-exporter>` to dump your documents and database out | ||||||
|  | into an arbitrary directory. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _migrating-restoring: | ||||||
|  | 
 | ||||||
|  | Restoring | ||||||
|  | --------- | ||||||
|  | 
 | ||||||
|  | Restoring your data is just as easy, since nearly all of your data exists either | ||||||
|  | in the file names, or in the contents of the files themselves.  You just need to | ||||||
|  | create an empty database (just follow the | ||||||
|  | :ref:`installation instructions <setup-installation>` again) and then import the | ||||||
|  | ``tags.json`` file you created as part of your backup.  Lastly, copy your | ||||||
|  | exported documents into the consumption directory and start up the consumer. | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |     $ cd /path/to/project | ||||||
|  |     $ rm data/db.sqlite3  # Delete the database | ||||||
|  |     $ cd src | ||||||
|  |     $ ./manage.py migrate  # Create the database | ||||||
|  |     $ ./manage.py createsuperuser | ||||||
|  |     $ ./manage.py loaddata /path/to/arbitrary/place/tags.json | ||||||
|  |     $ cp /path/to/exported/docs/* /path/to/consumption/dir/ | ||||||
|  |     $ ./manage.py document_consumer | ||||||
|  | 
 | ||||||
|  | Importing your data if you are :ref:`using Docker <setup-installation-docker>` | ||||||
|  | is almost as simple: | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |     # Stop and remove your current containers | ||||||
|  |     $ docker-compose stop | ||||||
|  |     $ docker-compose rm -f | ||||||
|  | 
 | ||||||
|  |     # Recreate them, add the superuser | ||||||
|  |     $ docker-compose up -d | ||||||
|  |     $ docker-compose run --rm webserver createsuperuser | ||||||
|  | 
 | ||||||
|  |     # Load the tags | ||||||
|  |     $ cat /path/to/arbitrary/place/tags.json | docker-compose run --rm webserver loaddata_stdin - | ||||||
|  | 
 | ||||||
|  |     # Load your exported documents into the consumption directory | ||||||
|  |     # (How you do this highly depends on how you have set this up) | ||||||
|  |     $ cp /path/to/exported/docs/* /path/to/mounted/consumption/dir/ | ||||||
|  | 
 | ||||||
|  | After loading the documents into the consumption directory the consumer will | ||||||
|  | immediately start consuming the documents. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. _migrating-updates: | .. _migrating-updates: | ||||||
| @ -20,7 +78,7 @@ on the directory containing the project files, and then use Django's ``migrate`` | |||||||
| command to execute any database schema updates that might have been rolled in | command to execute any database schema updates that might have been rolled in | ||||||
| as part of the update: | as part of the update: | ||||||
| 
 | 
 | ||||||
| .. code:: bash | .. code-block:: shell-session | ||||||
| 
 | 
 | ||||||
|     $ cd /path/to/project |     $ cd /path/to/project | ||||||
|     $ git pull |     $ git pull | ||||||
| @ -43,112 +101,3 @@ requires only one additional step: | |||||||
| 
 | 
 | ||||||
| If ``git pull`` doesn't report any changes, there is no need to continue with | If ``git pull`` doesn't report any changes, there is no need to continue with | ||||||
| the remaining steps. | the remaining steps. | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| .. _migrating-backup: |  | ||||||
| 
 |  | ||||||
| Backing Up |  | ||||||
| ---------- |  | ||||||
| 
 |  | ||||||
| So you're bored of this whole project, or you want to make a remote backup of |  | ||||||
| the unencrypted files for whatever reason.  This is easy to do, simply use the |  | ||||||
| :ref:`exporter <utilities-exporter>` to dump your documents out into an |  | ||||||
| arbitrary directory. |  | ||||||
| 
 |  | ||||||
| Additionally however, you'll need to back up the tags themselves.  The file |  | ||||||
| names contain the tag names, but you still need to define the tags and their |  | ||||||
| matching algorithms in the database for things to work properly.  We do this |  | ||||||
| with Django's ``dumpdata`` command, which produces JSON output. |  | ||||||
| 
 |  | ||||||
| .. code:: bash |  | ||||||
| 
 |  | ||||||
|     $ cd /path/to/project |  | ||||||
|     $ cd src |  | ||||||
|     $ ./manage.py document_export /path/to/arbitrary/place/ |  | ||||||
|     $ ./manage.py dumpdata documents.Tag > /path/to/arbitrary/place/tags.json |  | ||||||
| 
 |  | ||||||
| If you are :ref:`using Docker <setup-installation-docker>`, exporting your tags |  | ||||||
| as JSON is almost as easy: |  | ||||||
| 
 |  | ||||||
| .. code-block:: shell-session |  | ||||||
| 
 |  | ||||||
|     $ docker-compose run --rm webserver dumpdata documents.Tag > /path/to/arbitrary/place/tags.json |  | ||||||
| 
 |  | ||||||
| To export the documents you can either use ``docker run`` directly, specifying all |  | ||||||
| the commandline options by hand, or (more simply) mount a second volume for export. |  | ||||||
| 
 |  | ||||||
| To mount a volume for exports, follow the instructions in the |  | ||||||
| ``docker-compose.yml.example`` file for the ``/export`` volume (making the changes |  | ||||||
| in your own ``docker-compose.yml`` file, of course). Once you have the |  | ||||||
| volume mounted, the command to run an export is: |  | ||||||
| 
 |  | ||||||
| .. code-block:: console |  | ||||||
| 
 |  | ||||||
|    $ docker-compose run --rm consumer document_exporter /export |  | ||||||
| 
 |  | ||||||
| If you prefer to use ``docker run`` directly, supplying the necessary commandline |  | ||||||
| options: |  | ||||||
| 
 |  | ||||||
| .. code-block:: shell-session |  | ||||||
| 
 |  | ||||||
|    $ # Identify your containers |  | ||||||
|    $ docker-compose ps |  | ||||||
|            Name                       Command                State     Ports |  | ||||||
|    ------------------------------------------------------------------------- |  | ||||||
|    paperless_consumer_1    /sbin/docker-entrypoint.sh ...   Exit 0 |  | ||||||
|    paperless_webserver_1   /sbin/docker-entrypoint.sh ...   Exit 0 |  | ||||||
| 
 |  | ||||||
|    $ # Make sure to replace your passphrase and remove or adapt the id mapping |  | ||||||
|    $ docker run --rm \ |  | ||||||
|        --volumes-from paperless_data_1 \ |  | ||||||
|        --volume /path/to/arbitrary/place:/export \ |  | ||||||
|        -e PAPERLESS_PASSPHRASE=YOUR_PASSPHRASE \ |  | ||||||
|        -e USERMAP_UID=1000 -e USERMAP_GID=1000 \ |  | ||||||
|        paperless document_exporter /export |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| .. _migrating-restoring: |  | ||||||
| 
 |  | ||||||
| Restoring |  | ||||||
| --------- |  | ||||||
| 
 |  | ||||||
| Restoring your data is just as easy, since nearly all of your data exists either |  | ||||||
| in the file names, or in the contents of the files themselves.  You just need to |  | ||||||
| create an empty database (just follow the |  | ||||||
| :ref:`installation instructions <setup-installation>` again) and then import the |  | ||||||
| ``tags.json`` file you created as part of your backup.  Lastly, copy your |  | ||||||
| exported documents into the consumption directory and start up the consumer. |  | ||||||
| 
 |  | ||||||
| .. code:: bash |  | ||||||
| 
 |  | ||||||
|     $ cd /path/to/project |  | ||||||
|     $ rm data/db.sqlite3  # Delete the database |  | ||||||
|     $ cd src |  | ||||||
|     $ ./manage.py migrate  # Create the database |  | ||||||
|     $ ./manage.py createsuperuser |  | ||||||
|     $ ./manage.py loaddata /path/to/arbitrary/place/tags.json |  | ||||||
|     $ cp /path/to/exported/docs/* /path/to/consumption/dir/ |  | ||||||
|     $ ./manage.py document_consumer |  | ||||||
| 
 |  | ||||||
| Importing your data if you are :ref:`using Docker <setup-installation-docker>` |  | ||||||
| is almost as simple: |  | ||||||
| 
 |  | ||||||
| .. code-block:: shell-session |  | ||||||
| 
 |  | ||||||
|     $ # Stop and remove your current containers |  | ||||||
|     $ docker-compose stop |  | ||||||
|     $ docker-compose rm -f |  | ||||||
| 
 |  | ||||||
|     $ # Recreate them, add the superuser |  | ||||||
|     $ docker-compose up -d |  | ||||||
|     $ docker-compose run --rm webserver createsuperuser |  | ||||||
| 
 |  | ||||||
|     $ # Load the tags |  | ||||||
|     $ cat /path/to/arbitrary/place/tags.json | docker-compose run --rm webserver loaddata_stdin - |  | ||||||
| 
 |  | ||||||
|     $ # Load your exported documents into the consumption directory |  | ||||||
|     $ # (How you do this highly depends on how you have set this up) |  | ||||||
|     $ cp /path/to/exported/docs/* /path/to/mounted/consumption/dir/ |  | ||||||
| 
 |  | ||||||
| After loading the documents into the consumption directory the consumer will |  | ||||||
| immediately start consuming the documents. |  | ||||||
|  | |||||||
| @ -26,7 +26,7 @@ How to Use It | |||||||
| 
 | 
 | ||||||
| The webserver is started via the ``manage.py`` script: | The webserver is started via the ``manage.py`` script: | ||||||
| 
 | 
 | ||||||
| .. code:: bash | .. code-block:: shell-session | ||||||
| 
 | 
 | ||||||
|     $ /path/to/paperless/src/manage.py runserver |     $ /path/to/paperless/src/manage.py runserver | ||||||
| 
 | 
 | ||||||
| @ -64,7 +64,7 @@ How to Use It | |||||||
| 
 | 
 | ||||||
| The consumer is started via the ``manage.py`` script: | The consumer is started via the ``manage.py`` script: | ||||||
| 
 | 
 | ||||||
| .. code:: bash | .. code-block:: shell-session | ||||||
| 
 | 
 | ||||||
|     $ /path/to/paperless/src/manage.py document_consumer |     $ /path/to/paperless/src/manage.py document_consumer | ||||||
| 
 | 
 | ||||||
| @ -95,16 +95,86 @@ How to Use It | |||||||
| 
 | 
 | ||||||
| This too is done via the ``manage.py`` script: | This too is done via the ``manage.py`` script: | ||||||
| 
 | 
 | ||||||
| .. code:: bash | .. code-block:: shell-session | ||||||
| 
 | 
 | ||||||
|     $ /path/to/paperless/src/manage.py document_exporter /path/to/somewhere |     $ /path/to/paperless/src/manage.py document_exporter /path/to/somewhere/ | ||||||
| 
 | 
 | ||||||
| This will dump all of your PDFs into ``/path/to/somewhere`` for you to do with | This will dump all of your unencrypted PDFs into ``/path/to/somewhere`` for you | ||||||
| as you please.  The naming scheme on export is identical to that used for | to do with as you please.  The files are accompanied with a special file, | ||||||
| import, so should you can now safely delete the entire project directly, | ``manifest.json`` which can be used to | ||||||
| database, encrypted PDFs and all, and later create it all again simply by | :ref:`import the files <utilities-importer>` at a later date if you wish. | ||||||
| running the consumer again and dumping all of these files into | 
 | ||||||
| ``CONSUMPTION_DIR``. | 
 | ||||||
|  | .. _utilities-exporter-howto-docker: | ||||||
|  | 
 | ||||||
|  | Docker | ||||||
|  | ______ | ||||||
|  | 
 | ||||||
|  | If you are :ref:`using Docker <setup-installation-docker>`, running the | ||||||
|  | expoorter is almost as easy.  To mount a volume for exports, follow the | ||||||
|  | instructions in the ``docker-compose.yml.example`` file for the ``/export`` | ||||||
|  | volume (making the changes in your own ``docker-compose.yml`` file, of course). | ||||||
|  | Once you have the volume mounted, the command to run an export is: | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |    $ docker-compose run --rm consumer document_exporter /export | ||||||
|  | 
 | ||||||
|  | If you prefer to use ``docker run`` directly, supplying the necessary commandline | ||||||
|  | options: | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |    $ # Identify your containers | ||||||
|  |    $ docker-compose ps | ||||||
|  |            Name                       Command                State     Ports | ||||||
|  |    ------------------------------------------------------------------------- | ||||||
|  |    paperless_consumer_1    /sbin/docker-entrypoint.sh ...   Exit 0 | ||||||
|  |    paperless_webserver_1   /sbin/docker-entrypoint.sh ...   Exit 0 | ||||||
|  | 
 | ||||||
|  |    $ # Make sure to replace your passphrase and remove or adapt the id mapping | ||||||
|  |    $ docker run --rm \ | ||||||
|  |        --volumes-from paperless_data_1 \ | ||||||
|  |        --volume /path/to/arbitrary/place:/export \ | ||||||
|  |        -e PAPERLESS_PASSPHRASE=YOUR_PASSPHRASE \ | ||||||
|  |        -e USERMAP_UID=1000 -e USERMAP_GID=1000 \ | ||||||
|  |        paperless document_exporter /export | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _utilities-importer: | ||||||
|  | 
 | ||||||
|  | The Importer | ||||||
|  | ------------ | ||||||
|  | 
 | ||||||
|  | Looking to transfer Paperless data from one instance to another, or just want | ||||||
|  | to restore from a backup?  This is your go-to toy. | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | .. _utilities-importer-howto: | ||||||
|  | 
 | ||||||
|  | How to Use It | ||||||
|  | ............. | ||||||
|  | 
 | ||||||
|  | The importer works just like the exporter.  You point it at a directory, and | ||||||
|  | the script does the rest of the work: | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |     $ /path/to/paperless/src/manage.py document_importer /path/to/somewhere/ | ||||||
|  | 
 | ||||||
|  | Docker | ||||||
|  | ______ | ||||||
|  | 
 | ||||||
|  | Assuming that you've already gone through the steps above in the | ||||||
|  | :ref:`export <utilities-exporter-howto-docker>` section, then the easiest thing | ||||||
|  | to do is just re-use the ``/export`` path you already setup: | ||||||
|  | 
 | ||||||
|  | .. code-block:: shell-session | ||||||
|  | 
 | ||||||
|  |    $ docker-compose run --rm consumer document_importer /export | ||||||
|  | 
 | ||||||
|  | Similarly, if you're not using docker-compose, you can adjust the export | ||||||
|  | instructions above to do the import. | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| .. _utilities-retagger: | .. _utilities-retagger: | ||||||
|  | |||||||
| @ -3,7 +3,7 @@ from django.contrib.auth.models import User, Group | |||||||
| from django.core.urlresolvers import reverse | from django.core.urlresolvers import reverse | ||||||
| from django.templatetags.static import static | from django.templatetags.static import static | ||||||
| 
 | 
 | ||||||
| from .models import Sender, Tag, Document, Log | from .models import Correspondent, Tag, Document, Log | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class MonthListFilter(admin.SimpleListFilter): | class MonthListFilter(admin.SimpleListFilter): | ||||||
| @ -107,7 +107,7 @@ class LogAdmin(admin.ModelAdmin): | |||||||
|     list_filter = ("level", "component",) |     list_filter = ("level", "component",) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| admin.site.register(Sender) | admin.site.register(Correspondent) | ||||||
| admin.site.register(Tag, TagAdmin) | admin.site.register(Tag, TagAdmin) | ||||||
| admin.site.register(Document, DocumentAdmin) | admin.site.register(Document, DocumentAdmin) | ||||||
| admin.site.register(Log, LogAdmin) | admin.site.register(Log, LogAdmin) | ||||||
|  | |||||||
| @ -24,7 +24,7 @@ from pyocr.tesseract import TesseractError | |||||||
| 
 | 
 | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| 
 | 
 | ||||||
| from .models import Sender, Tag, Document, Log | from .models import Correspondent, Tag, Document, Log | ||||||
| from .languages import ISO639 | from .languages import ISO639 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -246,7 +246,7 @@ class Consumer(object): | |||||||
|         """ |         """ | ||||||
| 
 | 
 | ||||||
|         def get_sender(sender_name): |         def get_sender(sender_name): | ||||||
|             return Sender.objects.get_or_create( |             return Correspondent.objects.get_or_create( | ||||||
|                 name=sender_name, defaults={"slug": slugify(sender_name)})[0] |                 name=sender_name, defaults={"slug": slugify(sender_name)})[0] | ||||||
| 
 | 
 | ||||||
|         def get_tags(tags): |         def get_tags(tags): | ||||||
|  | |||||||
| @ -8,7 +8,7 @@ from time import mktime | |||||||
| from django import forms | from django import forms | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| 
 | 
 | ||||||
| from .models import Document, Sender | from .models import Document, Correspondent | ||||||
| from .consumer import Consumer | from .consumer import Consumer | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| @ -24,7 +24,9 @@ class UploadForm(forms.Form): | |||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     sender = forms.CharField( |     sender = forms.CharField( | ||||||
|         max_length=Sender._meta.get_field("name").max_length, required=False) |         max_length=Correspondent._meta.get_field("name").max_length, | ||||||
|  |         required=False | ||||||
|  |     ) | ||||||
|     title = forms.CharField( |     title = forms.CharField( | ||||||
|         max_length=Document._meta.get_field("title").max_length, |         max_length=Document._meta.get_field("title").max_length, | ||||||
|         required=False |         required=False | ||||||
| @ -41,7 +43,7 @@ class UploadForm(forms.Form): | |||||||
|         sender = self.cleaned_data.get("sender") |         sender = self.cleaned_data.get("sender") | ||||||
|         if not sender: |         if not sender: | ||||||
|             return None |             return None | ||||||
|         if not Sender.SAFE_REGEX.match(sender) or " - " in sender: |         if not Correspondent.SAFE_REGEX.match(sender) or " - " in sender: | ||||||
|             raise forms.ValidationError("That sender name is suspicious.") |             raise forms.ValidationError("That sender name is suspicious.") | ||||||
|         return sender |         return sender | ||||||
| 
 | 
 | ||||||
| @ -49,7 +51,7 @@ class UploadForm(forms.Form): | |||||||
|         title = self.cleaned_data.get("title") |         title = self.cleaned_data.get("title") | ||||||
|         if not title: |         if not title: | ||||||
|             return None |             return None | ||||||
|         if not Sender.SAFE_REGEX.match(title) or " - " in title: |         if not Correspondent.SAFE_REGEX.match(title) or " - " in title: | ||||||
|             raise forms.ValidationError("That title is suspicious.") |             raise forms.ValidationError("That title is suspicious.") | ||||||
| 
 | 
 | ||||||
|     def clean_document(self): |     def clean_document(self): | ||||||
|  | |||||||
| @ -14,7 +14,7 @@ from dateutil import parser | |||||||
| from django.conf import settings | from django.conf import settings | ||||||
| 
 | 
 | ||||||
| from .consumer import Consumer | from .consumer import Consumer | ||||||
| from .models import Sender, Log | from .models import Correspondent, Log | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class MailFetcherError(Exception): | class MailFetcherError(Exception): | ||||||
| @ -103,7 +103,7 @@ class Message(Loggable): | |||||||
|     def check_subject(self): |     def check_subject(self): | ||||||
|         if self.subject is None: |         if self.subject is None: | ||||||
|             raise InvalidMessageError("Message does not have a subject") |             raise InvalidMessageError("Message does not have a subject") | ||||||
|         if not Sender.SAFE_REGEX.match(self.subject): |         if not Correspondent.SAFE_REGEX.match(self.subject): | ||||||
|             raise InvalidMessageError("Message subject is unsafe: {}".format( |             raise InvalidMessageError("Message subject is unsafe: {}".format( | ||||||
|                 self.subject)) |                 self.subject)) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,10 +1,12 @@ | |||||||
|  | import json | ||||||
| import os | import os | ||||||
| import time | import time | ||||||
| 
 | 
 | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from django.core.management.base import BaseCommand, CommandError | from django.core.management.base import BaseCommand, CommandError | ||||||
|  | from django.core import serializers | ||||||
| 
 | 
 | ||||||
| from documents.models import Document | from documents.models import Document, Correspondent, Tag | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| 
 | 
 | ||||||
| from ...mixins import Renderable | from ...mixins import Renderable | ||||||
| @ -14,21 +16,19 @@ class Command(Renderable, BaseCommand): | |||||||
| 
 | 
 | ||||||
|     help = """ |     help = """ | ||||||
|         Decrypt and rename all files in our collection into a given target |         Decrypt and rename all files in our collection into a given target | ||||||
|         directory.  Note that we don't export any of the parsed data since |         directory.  And include a manifest file containing document data for | ||||||
|         that can always be re-collected via the consumer. |         easy import. | ||||||
|     """.replace("    ", "") |     """.replace("    ", "") | ||||||
| 
 | 
 | ||||||
|     def add_arguments(self, parser): |     def add_arguments(self, parser): | ||||||
|         parser.add_argument("target") |         parser.add_argument("target") | ||||||
| 
 | 
 | ||||||
|     def __init__(self, *args, **kwargs): |     def __init__(self, *args, **kwargs): | ||||||
|         self.verbosity = 0 |  | ||||||
|         self.target = None |  | ||||||
|         BaseCommand.__init__(self, *args, **kwargs) |         BaseCommand.__init__(self, *args, **kwargs) | ||||||
|  |         self.target = None | ||||||
| 
 | 
 | ||||||
|     def handle(self, *args, **options): |     def handle(self, *args, **options): | ||||||
| 
 | 
 | ||||||
|         self.verbosity = options["verbosity"] |  | ||||||
|         self.target = options["target"] |         self.target = options["target"] | ||||||
| 
 | 
 | ||||||
|         if not os.path.exists(self.target): |         if not os.path.exists(self.target): | ||||||
| @ -40,9 +40,15 @@ class Command(Renderable, BaseCommand): | |||||||
|         if not settings.PASSPHRASE: |         if not settings.PASSPHRASE: | ||||||
|             settings.PASSPHRASE = input("Please enter the passphrase: ") |             settings.PASSPHRASE = input("Please enter the passphrase: ") | ||||||
| 
 | 
 | ||||||
|         for document in Document.objects.all(): |         documents = Document.objects.all() | ||||||
|  |         document_map = {d.pk: d for d in documents} | ||||||
|  |         manifest = json.loads(serializers.serialize("json", documents)) | ||||||
|  |         for document_dict in manifest: | ||||||
|  | 
 | ||||||
|  |             document = document_map[document_dict["pk"]] | ||||||
| 
 | 
 | ||||||
|             target = os.path.join(self.target, document.file_name) |             target = os.path.join(self.target, document.file_name) | ||||||
|  |             document_dict["__exported_file_name__"] = target | ||||||
| 
 | 
 | ||||||
|             print("Exporting: {}".format(target)) |             print("Exporting: {}".format(target)) | ||||||
| 
 | 
 | ||||||
| @ -50,3 +56,12 @@ class Command(Renderable, BaseCommand): | |||||||
|                 f.write(GnuPG.decrypted(document.source_file)) |                 f.write(GnuPG.decrypted(document.source_file)) | ||||||
|                 t = int(time.mktime(document.created.timetuple())) |                 t = int(time.mktime(document.created.timetuple())) | ||||||
|                 os.utime(target, times=(t, t)) |                 os.utime(target, times=(t, t)) | ||||||
|  | 
 | ||||||
|  |         manifest += json.loads( | ||||||
|  |             serializers.serialize("json", Correspondent.objects.all())) | ||||||
|  | 
 | ||||||
|  |         manifest += json.loads(serializers.serialize( | ||||||
|  |             "json", Tag.objects.all())) | ||||||
|  | 
 | ||||||
|  |         with open(os.path.join(self.target, "manifest.json"), "w") as f: | ||||||
|  |             json.dump(manifest, f, indent=2) | ||||||
|  | |||||||
							
								
								
									
										110
									
								
								src/documents/management/commands/document_importer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								src/documents/management/commands/document_importer.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,110 @@ | |||||||
|  | import json | ||||||
|  | import os | ||||||
|  | 
 | ||||||
|  | from django.conf import settings | ||||||
|  | from django.core.management.base import BaseCommand, CommandError | ||||||
|  | from django.core.management import call_command | ||||||
|  | 
 | ||||||
|  | from documents.models import Document | ||||||
|  | from paperless.db import GnuPG | ||||||
|  | 
 | ||||||
|  | from ...mixins import Renderable | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Command(Renderable, BaseCommand): | ||||||
|  | 
 | ||||||
|  |     help = """ | ||||||
|  |         Using a manifest.json file, load the data from there, and import the | ||||||
|  |         documents it refers to. | ||||||
|  |     """.replace("    ", "") | ||||||
|  | 
 | ||||||
|  |     def add_arguments(self, parser): | ||||||
|  |         parser.add_argument("source") | ||||||
|  |         parser.add_argument( | ||||||
|  |             '--ignore-absent', | ||||||
|  |             action='store_true', | ||||||
|  |             default=False, | ||||||
|  |             help="If the manifest refers to a document that doesn't exist, " | ||||||
|  |                  "ignore it and attempt to import what it can" | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     def __init__(self, *args, **kwargs): | ||||||
|  |         BaseCommand.__init__(self, *args, **kwargs) | ||||||
|  |         self.source = None | ||||||
|  |         self.manifest = None | ||||||
|  | 
 | ||||||
|  |     def handle(self, *args, **options): | ||||||
|  | 
 | ||||||
|  |         self.source = options["source"] | ||||||
|  | 
 | ||||||
|  |         if not os.path.exists(self.source): | ||||||
|  |             raise CommandError("That path doesn't exist") | ||||||
|  | 
 | ||||||
|  |         if not os.access(self.source, os.R_OK): | ||||||
|  |             raise CommandError("That path doesn't appear to be readable") | ||||||
|  | 
 | ||||||
|  |         manifest_path = os.path.join(self.source, "manifest.json") | ||||||
|  |         self._check_manifest_exists(manifest_path) | ||||||
|  | 
 | ||||||
|  |         with open(manifest_path) as f: | ||||||
|  |             self.manifest = json.load(f) | ||||||
|  | 
 | ||||||
|  |         self._check_manifest() | ||||||
|  | 
 | ||||||
|  |         if not settings.PASSPHRASE: | ||||||
|  |             raise CommandError( | ||||||
|  |                 "You need to define a passphrase before continuing.  Please " | ||||||
|  |                 "consult the documentation for setting up Paperless." | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |         # Fill up the database with whatever is in the manifest | ||||||
|  |         call_command("loaddata", manifest_path) | ||||||
|  | 
 | ||||||
|  |         self._import_files_from_manifest() | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def _check_manifest_exists(path): | ||||||
|  |         if not os.path.exists(path): | ||||||
|  |             raise CommandError( | ||||||
|  |                 "That directory doesn't appear to contain a manifest.json " | ||||||
|  |                 "file." | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |     def _check_manifest(self): | ||||||
|  | 
 | ||||||
|  |         for record in self.manifest: | ||||||
|  | 
 | ||||||
|  |             if not record["model"] == "documents.document": | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             if "__exported_file_name__" not in record: | ||||||
|  |                 raise CommandError( | ||||||
|  |                     'The manifest file contains a record which does not ' | ||||||
|  |                     'refer to an actual document file.  If you want to import ' | ||||||
|  |                     'the rest anyway (skipping such references) call the ' | ||||||
|  |                     'importer with --ignore-absent' | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |             doc_file = record["__exported_file_name__"] | ||||||
|  |             if not os.path.exists(os.path.join(self.source, doc_file)): | ||||||
|  |                 raise CommandError( | ||||||
|  |                     'The manifest file refers to "{}" which does not ' | ||||||
|  |                     'appear to be in the source directory.  If you want to ' | ||||||
|  |                     'import the rest anyway (skipping such references) call ' | ||||||
|  |                     'the importer with --ignore-absent'.format(doc_file) | ||||||
|  |                 ) | ||||||
|  | 
 | ||||||
|  |     def _import_files_from_manifest(self): | ||||||
|  | 
 | ||||||
|  |         for record in self.manifest: | ||||||
|  | 
 | ||||||
|  |             if not record["model"] == "documents.document": | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             doc_file = record["__exported_file_name__"] | ||||||
|  |             document = Document.objects.get(pk=record["pk"]) | ||||||
|  |             with open(doc_file, "rb") as unencrypted: | ||||||
|  |                 with open(document.source_path, "wb") as encrypted: | ||||||
|  |                     print("Encrypting {} and saving it to {}".format( | ||||||
|  |                         doc_file, document.source_path)) | ||||||
|  |                     encrypted.write(GnuPG.encrypted(unencrypted)) | ||||||
							
								
								
									
										19
									
								
								src/documents/migrations/0011_auto_20160303_1929.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								src/documents/migrations/0011_auto_20160303_1929.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | |||||||
|  | # -*- coding: utf-8 -*- | ||||||
|  | # Generated by Django 1.9.2 on 2016-03-03 19:29 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | from django.db import migrations | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class Migration(migrations.Migration): | ||||||
|  | 
 | ||||||
|  |     dependencies = [ | ||||||
|  |         ('documents', '0010_log'), | ||||||
|  |     ] | ||||||
|  | 
 | ||||||
|  |     operations = [ | ||||||
|  |         migrations.RenameModel( | ||||||
|  |             old_name='Sender', | ||||||
|  |             new_name='Correspondent', | ||||||
|  |         ), | ||||||
|  |     ] | ||||||
| @ -28,7 +28,7 @@ class SluggedModel(models.Model): | |||||||
|         return self.name |         return self.name | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class Sender(SluggedModel): | class Correspondent(SluggedModel): | ||||||
| 
 | 
 | ||||||
|     # This regex is probably more restrictive than it needs to be, but it's |     # This regex is probably more restrictive than it needs to be, but it's | ||||||
|     # better safe than sorry. |     # better safe than sorry. | ||||||
| @ -141,7 +141,7 @@ class Document(models.Model): | |||||||
|     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) |     TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,) | ||||||
| 
 | 
 | ||||||
|     sender = models.ForeignKey( |     sender = models.ForeignKey( | ||||||
|         Sender, blank=True, null=True, related_name="documents") |         Correspondent, blank=True, null=True, related_name="documents") | ||||||
|     title = models.CharField(max_length=128, blank=True, db_index=True) |     title = models.CharField(max_length=128, blank=True, db_index=True) | ||||||
|     content = models.TextField(db_index=True) |     content = models.TextField(db_index=True) | ||||||
|     file_type = models.CharField( |     file_type = models.CharField( | ||||||
| @ -158,9 +158,9 @@ class Document(models.Model): | |||||||
|         ordering = ("sender", "title") |         ordering = ("sender", "title") | ||||||
| 
 | 
 | ||||||
|     def __str__(self): |     def __str__(self): | ||||||
|         created = self.created.strftime("%Y-%m-%d") |         created = self.created.strftime("%Y%m%d%H%M%S") | ||||||
|         if self.sender and self.title: |         if self.sender and self.title: | ||||||
|             return "{}: {}, {}".format(created, self.sender, self.title) |             return "{}: {} - {}".format(created, self.sender, self.title) | ||||||
|         if self.sender or self.title: |         if self.sender or self.title: | ||||||
|             return "{}: {}".format(created, self.sender or self.title) |             return "{}: {}".format(created, self.sender or self.title) | ||||||
|         return str(created) |         return str(created) | ||||||
| @ -179,13 +179,7 @@ class Document(models.Model): | |||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|     def file_name(self): |     def file_name(self): | ||||||
|         if self.sender and self.title: |         return slugify(str(self)) + "." + self.file_type | ||||||
|             tags = ",".join([t.slug for t in self.tags.all()]) |  | ||||||
|             if tags: |  | ||||||
|                 return "{} - {} - {}.{}".format( |  | ||||||
|                     self.sender, self.title, tags, self.file_type) |  | ||||||
|             return "{} - {}.{}".format(self.sender, self.title, self.file_type) |  | ||||||
|         return os.path.basename(self.source_path) |  | ||||||
| 
 | 
 | ||||||
|     @property |     @property | ||||||
|     def download_url(self): |     def download_url(self): | ||||||
|  | |||||||
| @ -1,12 +1,12 @@ | |||||||
| from rest_framework import serializers | from rest_framework import serializers | ||||||
| 
 | 
 | ||||||
| from .models import Sender, Tag, Document, Log | from .models import Correspondent, Tag, Document, Log | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class SenderSerializer(serializers.HyperlinkedModelSerializer): | class CorrespondentSerializer(serializers.HyperlinkedModelSerializer): | ||||||
| 
 | 
 | ||||||
|     class Meta(object): |     class Meta(object): | ||||||
|         model = Sender |         model = Correspondent | ||||||
|         fields = ("id", "slug", "name") |         fields = ("id", "slug", "name") | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -1,6 +1,5 @@ | |||||||
| from django.contrib.auth.mixins import LoginRequiredMixin | from django.contrib.auth.mixins import LoginRequiredMixin | ||||||
| from django.http import HttpResponse | from django.http import HttpResponse | ||||||
| from django.template.defaultfilters import slugify |  | ||||||
| from django.views.decorators.csrf import csrf_exempt | from django.views.decorators.csrf import csrf_exempt | ||||||
| from django.views.generic import FormView, DetailView, TemplateView | from django.views.generic import FormView, DetailView, TemplateView | ||||||
| 
 | 
 | ||||||
| @ -14,9 +13,9 @@ from rest_framework.viewsets import ( | |||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| 
 | 
 | ||||||
| from .forms import UploadForm | from .forms import UploadForm | ||||||
| from .models import Sender, Tag, Document, Log | from .models import Correspondent, Tag, Document, Log | ||||||
| from .serialisers import ( | from .serialisers import ( | ||||||
|     SenderSerializer, TagSerializer, DocumentSerializer, LogSerializer) |     CorrespondentSerializer, TagSerializer, DocumentSerializer, LogSerializer) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class IndexView(TemplateView): | class IndexView(TemplateView): | ||||||
| @ -52,7 +51,7 @@ class FetchView(LoginRequiredMixin, DetailView): | |||||||
|             content_type=content_types[self.object.file_type] |             content_type=content_types[self.object.file_type] | ||||||
|         ) |         ) | ||||||
|         response["Content-Disposition"] = 'attachment; filename="{}"'.format( |         response["Content-Disposition"] = 'attachment; filename="{}"'.format( | ||||||
|             slugify(str(self.object)) + "." + self.object.file_type) |             self.object.file_name) | ||||||
| 
 | 
 | ||||||
|         return response |         return response | ||||||
| 
 | 
 | ||||||
| @ -81,10 +80,10 @@ class StandardPagination(PageNumberPagination): | |||||||
|     max_page_size = 100000 |     max_page_size = 100000 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class SenderViewSet(ModelViewSet): | class CorrespondentViewSet(ModelViewSet): | ||||||
|     model = Sender |     model = Correspondent | ||||||
|     queryset = Sender.objects.all() |     queryset = Correspondent.objects.all() | ||||||
|     serializer_class = SenderSerializer |     serializer_class = CorrespondentSerializer | ||||||
|     pagination_class = StandardPagination |     pagination_class = StandardPagination | ||||||
|     permission_classes = (IsAuthenticated,) |     permission_classes = (IsAuthenticated,) | ||||||
| 
 | 
 | ||||||
|  | |||||||
| @ -22,11 +22,11 @@ from rest_framework.routers import DefaultRouter | |||||||
| 
 | 
 | ||||||
| from documents.views import ( | from documents.views import ( | ||||||
|     IndexView, FetchView, PushView, |     IndexView, FetchView, PushView, | ||||||
|     SenderViewSet, TagViewSet, DocumentViewSet, LogViewSet |     CorrespondentViewSet, TagViewSet, DocumentViewSet, LogViewSet | ||||||
| ) | ) | ||||||
| 
 | 
 | ||||||
| router = DefaultRouter() | router = DefaultRouter() | ||||||
| router.register(r'senders', SenderViewSet) | router.register(r'senders', CorrespondentViewSet) | ||||||
| router.register(r'tags', TagViewSet) | router.register(r'tags', TagViewSet) | ||||||
| router.register(r'documents', DocumentViewSet) | router.register(r'documents', DocumentViewSet) | ||||||
| router.register(r'logs', LogViewSet) | router.register(r'logs', LogViewSet) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user