mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2026-03-29 04:42:21 -04:00
* refactor: switch consumer and callers to ParserRegistry (Phase 4) Replace all Django signal-based parser discovery with direct registry calls. Removes `_parser_cleanup`, `parser_is_new_style` shims, and all old-style isinstance checks. All parser instantiation now uses the `with parser_class() as parser:` context manager pattern. - documents/parsers.py: delegate to get_parser_registry(); drop lru_cache - documents/consumer.py: use registry + context manager; remove shims - documents/tasks.py: same pattern - documents/management/commands/document_thumbnails.py: same pattern - documents/views.py: get_metadata uses context manager - documents/checks.py: use get_parser_registry().all_parsers() - paperless/parsers/registry.py: add all_parsers() public method - tests: update mocks to target documents.consumer.get_parser_class_for_mime_type Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * refactor: drop get_parser_class_for_mime_type; callers use registry directly All callers now call get_parser_registry().get_parser_for_file() with the actual filename and path, enabling score() to use file extension hints. The MIME-only helper is removed. - consumer.py: passes self.filename + self.working_copy - tasks.py: passes document.original_filename + document.source_path - document_thumbnails.py: same pattern - views.py: passes Path(file).name + Path(file) - parsers.py: internal helpers inline the registry call with filename="" - test_parsers.py: drop TestParserDiscovery (was testing mock behavior); TestParserAvailability uses registry directly - test_consumer.py: mocks switch to documents.consumer.get_parser_registry Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * refactor: remove document_consumer_declaration signal infrastructure Remove the document_consumer_declaration signal that was previously used for parser registration. Each parser app no longer connects to this signal, and the signal declaration itself has been removed from documents/signals. Changes: - Remove document_consumer_declaration from documents/signals/__init__.py - Remove ready() methods and signal imports from all parser app configs - Delete signal shim files (signals.py) from all parser apps: - paperless_tesseract/signals.py - paperless_text/signals.py - paperless_tika/signals.py - paperless_mail/signals.py - paperless_remote/signals.py Parser discovery now happens exclusively through the ParserRegistry system introduced in the previous refactor phases. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * refactor: remove empty paperless_text and paperless_tika Django apps After parser classes were moved to paperless/parsers/ in the plugin refactor, these Django apps contained only empty AppConfig classes with no models, views, tasks, migrations, or other functionality. - Remove paperless_text and paperless_tika from INSTALLED_APPS - Delete empty app directories entirely - Update pyproject.toml test exclusions - Clean stale mypy baseline entries for moved parser files paperless_remote app is retained as it contains meaningful system checks for Azure AI configuration. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Moves the checks and tests to the main application and removes the old applications * Adds a comment to satisy Sonar * refactor: remove automatic log_summary() call from get_parser_registry() The summary was logged once per process, causing it to appear repeatedly during Docker startup (management commands, web server, each Celery worker subprocess). External parsers are already announced individually at INFO when discovered; the full summary is redundant noise. log_summary() is retained on ParserRegistry for manual/debug use. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * Cleans up the duplicate test file/fixture * Fixes a race condition where webserver threads could race to populate the registry --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1116 lines
38 KiB
Python
1116 lines
38 KiB
Python
import datetime
|
|
import json
|
|
import logging
|
|
import logging.config
|
|
import math
|
|
import multiprocessing
|
|
import os
|
|
import tempfile
|
|
from pathlib import Path
|
|
from typing import Final
|
|
from urllib.parse import urlparse
|
|
|
|
from compression_middleware.middleware import CompressionMiddleware
|
|
from django.utils.translation import gettext_lazy as _
|
|
from dotenv import load_dotenv
|
|
|
|
from paperless.settings.custom import parse_beat_schedule
|
|
from paperless.settings.custom import parse_dateparser_languages
|
|
from paperless.settings.custom import parse_db_settings
|
|
from paperless.settings.custom import parse_hosting_settings
|
|
from paperless.settings.custom import parse_ignore_dates
|
|
from paperless.settings.custom import parse_redis_url
|
|
from paperless.settings.parsers import get_bool_from_env
|
|
from paperless.settings.parsers import get_float_from_env
|
|
from paperless.settings.parsers import get_int_from_env
|
|
from paperless.settings.parsers import get_list_from_env
|
|
from paperless.settings.parsers import get_path_from_env
|
|
|
|
logger = logging.getLogger("paperless.settings")
|
|
|
|
# Tap paperless.conf if it's available
|
|
for path in [
|
|
os.getenv("PAPERLESS_CONFIGURATION_PATH"),
|
|
"../paperless.conf",
|
|
"/etc/paperless.conf",
|
|
"/usr/local/etc/paperless.conf",
|
|
]:
|
|
if path and Path(path).exists():
|
|
load_dotenv(path)
|
|
break
|
|
|
|
# There are multiple levels of concurrency in paperless:
|
|
# - Multiple consumers may be run in parallel.
|
|
# - Each consumer may process multiple pages in parallel.
|
|
# - Each Tesseract OCR run may spawn multiple threads to process a single page
|
|
# slightly faster.
|
|
# The performance gains from having tesseract use multiple threads are minimal.
|
|
# However, when multiple pages are processed in parallel, the total number of
|
|
# OCR threads may exceed the number of available cpu cores, which will
|
|
# dramatically slow down the consumption process. This settings limits each
|
|
# Tesseract process to one thread.
|
|
os.environ["OMP_THREAD_LIMIT"] = "1"
|
|
|
|
|
|
# NEVER RUN WITH DEBUG IN PRODUCTION.
|
|
DEBUG = get_bool_from_env("PAPERLESS_DEBUG", "NO")
|
|
|
|
|
|
###############################################################################
|
|
# Directories #
|
|
###############################################################################
|
|
|
|
BASE_DIR: Path = Path(__file__).resolve().parent.parent.parent
|
|
|
|
STATIC_ROOT = get_path_from_env("PAPERLESS_STATICDIR", BASE_DIR.parent / "static")
|
|
|
|
MEDIA_ROOT = get_path_from_env("PAPERLESS_MEDIA_ROOT", BASE_DIR.parent / "media")
|
|
ORIGINALS_DIR = MEDIA_ROOT / "documents" / "originals"
|
|
ARCHIVE_DIR = MEDIA_ROOT / "documents" / "archive"
|
|
THUMBNAIL_DIR = MEDIA_ROOT / "documents" / "thumbnails"
|
|
SHARE_LINK_BUNDLE_DIR = MEDIA_ROOT / "documents" / "share_link_bundles"
|
|
|
|
DATA_DIR = get_path_from_env("PAPERLESS_DATA_DIR", BASE_DIR.parent / "data")
|
|
|
|
NLTK_DIR = get_path_from_env("PAPERLESS_NLTK_DIR", "/usr/share/nltk_data")
|
|
|
|
# Check deprecated setting first
|
|
EMPTY_TRASH_DIR = (
|
|
get_path_from_env("PAPERLESS_TRASH_DIR", os.getenv("PAPERLESS_EMPTY_TRASH_DIR"))
|
|
if os.getenv("PAPERLESS_TRASH_DIR") or os.getenv("PAPERLESS_EMPTY_TRASH_DIR")
|
|
else None
|
|
)
|
|
|
|
# Lock file for synchronizing changes to the MEDIA directory across multiple
|
|
# threads.
|
|
MEDIA_LOCK = MEDIA_ROOT / "media.lock"
|
|
INDEX_DIR = DATA_DIR / "index"
|
|
MODEL_FILE = get_path_from_env(
|
|
"PAPERLESS_MODEL_FILE",
|
|
DATA_DIR / "classification_model.pickle",
|
|
)
|
|
LLM_INDEX_DIR = DATA_DIR / "llm_index"
|
|
|
|
LOGGING_DIR = get_path_from_env("PAPERLESS_LOGGING_DIR", DATA_DIR / "log")
|
|
|
|
CONSUMPTION_DIR = get_path_from_env(
|
|
"PAPERLESS_CONSUMPTION_DIR",
|
|
BASE_DIR.parent / "consume",
|
|
)
|
|
|
|
# This will be created if it doesn't exist
|
|
SCRATCH_DIR = get_path_from_env(
|
|
"PAPERLESS_SCRATCH_DIR",
|
|
Path(tempfile.gettempdir()) / "paperless",
|
|
)
|
|
|
|
###############################################################################
|
|
# Application Definition #
|
|
###############################################################################
|
|
|
|
env_apps = get_list_from_env("PAPERLESS_APPS")
|
|
|
|
INSTALLED_APPS = [
|
|
"whitenoise.runserver_nostatic",
|
|
"django.contrib.auth",
|
|
"django.contrib.contenttypes",
|
|
"django.contrib.sessions",
|
|
"django.contrib.messages",
|
|
"django.contrib.staticfiles",
|
|
"corsheaders",
|
|
"django_extensions",
|
|
"paperless",
|
|
"documents.apps.DocumentsConfig",
|
|
"paperless_mail.apps.PaperlessMailConfig",
|
|
"django.contrib.admin",
|
|
"rest_framework",
|
|
"rest_framework.authtoken",
|
|
"django_filters",
|
|
"django_celery_results",
|
|
"guardian",
|
|
"allauth",
|
|
"allauth.account",
|
|
"allauth.socialaccount",
|
|
"allauth.mfa",
|
|
"allauth.headless",
|
|
"drf_spectacular",
|
|
"drf_spectacular_sidecar",
|
|
"treenode",
|
|
*env_apps,
|
|
]
|
|
|
|
if DEBUG:
|
|
INSTALLED_APPS.append("channels")
|
|
|
|
REST_FRAMEWORK = {
|
|
"DEFAULT_AUTHENTICATION_CLASSES": [
|
|
"paperless.auth.PaperlessBasicAuthentication",
|
|
"rest_framework.authentication.TokenAuthentication",
|
|
"rest_framework.authentication.SessionAuthentication",
|
|
],
|
|
"DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.AcceptHeaderVersioning",
|
|
"DEFAULT_VERSION": "10", # match src-ui/src/environments/environment.prod.ts
|
|
# Make sure these are ordered and that the most recent version appears
|
|
# last. See api.md#api-versioning when adding new versions.
|
|
"ALLOWED_VERSIONS": ["9", "10"],
|
|
# DRF Spectacular default schema
|
|
"DEFAULT_SCHEMA_CLASS": "drf_spectacular.openapi.AutoSchema",
|
|
}
|
|
|
|
if DEBUG:
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].append(
|
|
"paperless.auth.AngularApiAuthenticationOverride",
|
|
)
|
|
|
|
MIDDLEWARE = [
|
|
"django.middleware.security.SecurityMiddleware",
|
|
"whitenoise.middleware.WhiteNoiseMiddleware",
|
|
"django.contrib.sessions.middleware.SessionMiddleware",
|
|
"corsheaders.middleware.CorsMiddleware",
|
|
"django.middleware.locale.LocaleMiddleware",
|
|
"django.middleware.common.CommonMiddleware",
|
|
"django.middleware.csrf.CsrfViewMiddleware",
|
|
"paperless.middleware.ApiVersionMiddleware",
|
|
"django.contrib.auth.middleware.AuthenticationMiddleware",
|
|
"django.contrib.messages.middleware.MessageMiddleware",
|
|
"django.middleware.clickjacking.XFrameOptionsMiddleware",
|
|
"allauth.account.middleware.AccountMiddleware",
|
|
]
|
|
|
|
# Optional to enable compression
|
|
if get_bool_from_env("PAPERLESS_ENABLE_COMPRESSION", "yes"): # pragma: no cover
|
|
MIDDLEWARE.insert(0, "compression_middleware.middleware.CompressionMiddleware")
|
|
|
|
# Workaround to not compress streaming responses (e.g. chat).
|
|
# See https://github.com/friedelwolff/django-compression-middleware/pull/7
|
|
original_process_response = CompressionMiddleware.process_response
|
|
|
|
|
|
def patched_process_response(self, request, response):
|
|
if getattr(request, "compress_exempt", False):
|
|
return response
|
|
return original_process_response(self, request, response)
|
|
|
|
|
|
CompressionMiddleware.process_response = patched_process_response
|
|
|
|
ROOT_URLCONF = "paperless.urls"
|
|
|
|
|
|
FORCE_SCRIPT_NAME, BASE_URL, LOGIN_URL, LOGIN_REDIRECT_URL, LOGOUT_REDIRECT_URL = (
|
|
parse_hosting_settings()
|
|
)
|
|
|
|
# DRF Spectacular settings
|
|
SPECTACULAR_SETTINGS = {
|
|
"TITLE": "Paperless-ngx REST API",
|
|
"DESCRIPTION": "OpenAPI Spec for Paperless-ngx",
|
|
"VERSION": "6.0.0",
|
|
"SERVE_INCLUDE_SCHEMA": False,
|
|
"SWAGGER_UI_DIST": "SIDECAR",
|
|
"COMPONENT_SPLIT_REQUEST": True,
|
|
"EXTERNAL_DOCS": {
|
|
"description": "Paperless-ngx API Documentation",
|
|
"url": "https://docs.paperless-ngx.com/api/",
|
|
},
|
|
"ENUM_NAME_OVERRIDES": {
|
|
"MatchingAlgorithm": "documents.models.MatchingModel.MATCHING_ALGORITHMS",
|
|
},
|
|
"SCHEMA_PATH_PREFIX_INSERT": FORCE_SCRIPT_NAME or "",
|
|
}
|
|
|
|
WSGI_APPLICATION = "paperless.wsgi.application"
|
|
ASGI_APPLICATION = "paperless.asgi.application"
|
|
|
|
STATIC_URL = os.getenv("PAPERLESS_STATIC_URL", BASE_URL + "static/")
|
|
WHITENOISE_STATIC_PREFIX = "/static/"
|
|
|
|
STORAGES = {
|
|
"staticfiles": {
|
|
"BACKEND": "whitenoise.storage.CompressedStaticFilesStorage",
|
|
},
|
|
"default": {"BACKEND": "django.core.files.storage.FileSystemStorage"},
|
|
}
|
|
|
|
_CELERY_REDIS_URL, _CHANNELS_REDIS_URL = parse_redis_url(
|
|
os.getenv("PAPERLESS_REDIS", None),
|
|
)
|
|
_REDIS_KEY_PREFIX = os.getenv("PAPERLESS_REDIS_PREFIX", "")
|
|
|
|
TEMPLATES = [
|
|
{
|
|
"BACKEND": "django.template.backends.django.DjangoTemplates",
|
|
"DIRS": [],
|
|
"APP_DIRS": True,
|
|
"OPTIONS": {
|
|
"context_processors": [
|
|
"django.template.context_processors.debug",
|
|
"django.template.context_processors.request",
|
|
"django.contrib.auth.context_processors.auth",
|
|
"django.contrib.messages.context_processors.messages",
|
|
"documents.context_processors.settings",
|
|
],
|
|
},
|
|
},
|
|
]
|
|
|
|
_CHANNELS_BACKEND = os.environ.get(
|
|
"PAPERLESS_CHANNELS_BACKEND",
|
|
"channels_redis.pubsub.RedisPubSubChannelLayer",
|
|
)
|
|
CHANNEL_LAYERS = {
|
|
"default": {
|
|
"BACKEND": _CHANNELS_BACKEND,
|
|
},
|
|
}
|
|
|
|
if _CHANNELS_BACKEND.startswith("channels_redis."):
|
|
CHANNEL_LAYERS["default"]["CONFIG"] = {
|
|
"hosts": [_CHANNELS_REDIS_URL],
|
|
"capacity": 2000, # default 100
|
|
"expiry": 15, # default 60
|
|
"prefix": _REDIS_KEY_PREFIX,
|
|
}
|
|
|
|
###############################################################################
|
|
# Email (SMTP) Backend #
|
|
###############################################################################
|
|
|
|
EMAIL_HOST: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST", "localhost")
|
|
EMAIL_PORT: Final[int] = int(os.getenv("PAPERLESS_EMAIL_PORT", 25))
|
|
EMAIL_HOST_USER: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_USER", "")
|
|
EMAIL_HOST_PASSWORD: Final[str] = os.getenv("PAPERLESS_EMAIL_HOST_PASSWORD", "")
|
|
DEFAULT_FROM_EMAIL: Final[str] = os.getenv("PAPERLESS_EMAIL_FROM", EMAIL_HOST_USER)
|
|
EMAIL_USE_TLS: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_TLS")
|
|
EMAIL_USE_SSL: Final[bool] = get_bool_from_env("PAPERLESS_EMAIL_USE_SSL")
|
|
EMAIL_SUBJECT_PREFIX: Final[str] = "[Paperless-ngx] "
|
|
EMAIL_TIMEOUT = 30.0
|
|
EMAIL_ENABLED = EMAIL_HOST != "localhost" or EMAIL_HOST_USER != ""
|
|
if DEBUG: # pragma: no cover
|
|
EMAIL_BACKEND = "django.core.mail.backends.filebased.EmailBackend"
|
|
EMAIL_FILE_PATH = BASE_DIR / "sent_emails"
|
|
|
|
###############################################################################
|
|
# Security #
|
|
###############################################################################
|
|
|
|
AUTHENTICATION_BACKENDS = [
|
|
"guardian.backends.ObjectPermissionBackend",
|
|
"django.contrib.auth.backends.ModelBackend",
|
|
"allauth.account.auth_backends.AuthenticationBackend",
|
|
]
|
|
|
|
ACCOUNT_LOGOUT_ON_GET = True
|
|
ACCOUNT_DEFAULT_HTTP_PROTOCOL = os.getenv(
|
|
"PAPERLESS_ACCOUNT_DEFAULT_HTTP_PROTOCOL",
|
|
"https",
|
|
)
|
|
|
|
ACCOUNT_ADAPTER = "paperless.adapter.CustomAccountAdapter"
|
|
ACCOUNT_ALLOW_SIGNUPS = get_bool_from_env("PAPERLESS_ACCOUNT_ALLOW_SIGNUPS")
|
|
ACCOUNT_DEFAULT_GROUPS = get_list_from_env("PAPERLESS_ACCOUNT_DEFAULT_GROUPS")
|
|
|
|
SOCIALACCOUNT_ADAPTER = "paperless.adapter.CustomSocialAccountAdapter"
|
|
SOCIALACCOUNT_ALLOW_SIGNUPS = get_bool_from_env(
|
|
"PAPERLESS_SOCIALACCOUNT_ALLOW_SIGNUPS",
|
|
"yes",
|
|
)
|
|
SOCIALACCOUNT_AUTO_SIGNUP = get_bool_from_env("PAPERLESS_SOCIAL_AUTO_SIGNUP")
|
|
SOCIALACCOUNT_PROVIDERS = json.loads(
|
|
os.getenv("PAPERLESS_SOCIALACCOUNT_PROVIDERS", "{}"),
|
|
)
|
|
SOCIAL_ACCOUNT_DEFAULT_GROUPS = get_list_from_env(
|
|
"PAPERLESS_SOCIAL_ACCOUNT_DEFAULT_GROUPS",
|
|
)
|
|
SOCIAL_ACCOUNT_SYNC_GROUPS = get_bool_from_env("PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS")
|
|
SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM: Final[str] = os.getenv(
|
|
"PAPERLESS_SOCIAL_ACCOUNT_SYNC_GROUPS_CLAIM",
|
|
"groups",
|
|
)
|
|
|
|
HEADLESS_TOKEN_STRATEGY = "paperless.adapter.DrfTokenStrategy"
|
|
|
|
MFA_TOTP_ISSUER = "Paperless-ngx"
|
|
|
|
ACCOUNT_EMAIL_SUBJECT_PREFIX = "[Paperless-ngx] "
|
|
|
|
DISABLE_REGULAR_LOGIN = get_bool_from_env("PAPERLESS_DISABLE_REGULAR_LOGIN")
|
|
REDIRECT_LOGIN_TO_SSO = get_bool_from_env("PAPERLESS_REDIRECT_LOGIN_TO_SSO")
|
|
|
|
AUTO_LOGIN_USERNAME = os.getenv("PAPERLESS_AUTO_LOGIN_USERNAME")
|
|
|
|
ACCOUNT_EMAIL_VERIFICATION = (
|
|
"none"
|
|
if not EMAIL_ENABLED
|
|
else os.getenv(
|
|
"PAPERLESS_ACCOUNT_EMAIL_VERIFICATION",
|
|
"optional",
|
|
)
|
|
)
|
|
|
|
ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS = get_bool_from_env(
|
|
"PAPERLESS_ACCOUNT_EMAIL_UNKNOWN_ACCOUNTS",
|
|
"True",
|
|
)
|
|
|
|
ACCOUNT_SESSION_REMEMBER = get_bool_from_env(
|
|
"PAPERLESS_ACCOUNT_SESSION_REMEMBER",
|
|
"True",
|
|
)
|
|
SESSION_EXPIRE_AT_BROWSER_CLOSE = not ACCOUNT_SESSION_REMEMBER
|
|
SESSION_COOKIE_AGE = int(
|
|
os.getenv("PAPERLESS_SESSION_COOKIE_AGE", 60 * 60 * 24 * 7 * 3),
|
|
)
|
|
# https://docs.djangoproject.com/en/5.1/ref/settings/#std-setting-SESSION_ENGINE
|
|
SESSION_ENGINE = "django.contrib.sessions.backends.cached_db"
|
|
|
|
if AUTO_LOGIN_USERNAME:
|
|
_index = MIDDLEWARE.index("django.contrib.auth.middleware.AuthenticationMiddleware")
|
|
# This overrides everything the auth middleware is doing but still allows
|
|
# regular login in case the provided user does not exist.
|
|
MIDDLEWARE.insert(_index + 1, "paperless.auth.AutoLoginMiddleware")
|
|
|
|
|
|
def _parse_remote_user_settings() -> str:
|
|
global MIDDLEWARE, AUTHENTICATION_BACKENDS, REST_FRAMEWORK
|
|
enable = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER")
|
|
enable_api = get_bool_from_env("PAPERLESS_ENABLE_HTTP_REMOTE_USER_API")
|
|
if enable or enable_api:
|
|
MIDDLEWARE.append("paperless.auth.HttpRemoteUserMiddleware")
|
|
AUTHENTICATION_BACKENDS.insert(
|
|
0,
|
|
"django.contrib.auth.backends.RemoteUserBackend",
|
|
)
|
|
|
|
if enable_api:
|
|
REST_FRAMEWORK["DEFAULT_AUTHENTICATION_CLASSES"].insert(
|
|
0,
|
|
"paperless.auth.PaperlessRemoteUserAuthentication",
|
|
)
|
|
|
|
header_name = os.getenv(
|
|
"PAPERLESS_HTTP_REMOTE_USER_HEADER_NAME",
|
|
"HTTP_REMOTE_USER",
|
|
)
|
|
|
|
return header_name
|
|
|
|
|
|
HTTP_REMOTE_USER_HEADER_NAME = _parse_remote_user_settings()
|
|
|
|
# X-Frame options for embedded PDF display:
|
|
X_FRAME_OPTIONS = "SAMEORIGIN"
|
|
|
|
# The next 3 settings can also be set using just PAPERLESS_URL
|
|
CSRF_TRUSTED_ORIGINS = get_list_from_env("PAPERLESS_CSRF_TRUSTED_ORIGINS")
|
|
|
|
if DEBUG:
|
|
# Allow access from the angular development server during debugging
|
|
CSRF_TRUSTED_ORIGINS.append("http://localhost:4200")
|
|
|
|
# We allow CORS from localhost:8000
|
|
CORS_ALLOWED_ORIGINS = get_list_from_env(
|
|
"PAPERLESS_CORS_ALLOWED_HOSTS",
|
|
default=["http://localhost:8000"],
|
|
)
|
|
|
|
if DEBUG:
|
|
# Allow access from the angular development server during debugging
|
|
CORS_ALLOWED_ORIGINS.append("http://localhost:4200")
|
|
|
|
CORS_ALLOW_CREDENTIALS = True
|
|
|
|
CORS_EXPOSE_HEADERS = [
|
|
"Content-Disposition",
|
|
]
|
|
|
|
ALLOWED_HOSTS = get_list_from_env("PAPERLESS_ALLOWED_HOSTS", default=["*"])
|
|
if ALLOWED_HOSTS != ["*"]:
|
|
# always allow localhost. Necessary e.g. for healthcheck in docker.
|
|
ALLOWED_HOSTS.append("localhost")
|
|
|
|
|
|
def _parse_paperless_url():
|
|
global CSRF_TRUSTED_ORIGINS, CORS_ALLOWED_ORIGINS, ALLOWED_HOSTS
|
|
url = os.getenv("PAPERLESS_URL")
|
|
if url:
|
|
CSRF_TRUSTED_ORIGINS.append(url)
|
|
CORS_ALLOWED_ORIGINS.append(url)
|
|
ALLOWED_HOSTS.append(urlparse(url).hostname)
|
|
|
|
return url
|
|
|
|
|
|
PAPERLESS_URL = _parse_paperless_url()
|
|
|
|
# For use with trusted proxies
|
|
TRUSTED_PROXIES = get_list_from_env("PAPERLESS_TRUSTED_PROXIES")
|
|
|
|
USE_X_FORWARDED_HOST = get_bool_from_env("PAPERLESS_USE_X_FORWARD_HOST", "false")
|
|
USE_X_FORWARDED_PORT = get_bool_from_env("PAPERLESS_USE_X_FORWARD_PORT", "false")
|
|
SECURE_PROXY_SSL_HEADER = (
|
|
tuple(json.loads(os.environ["PAPERLESS_PROXY_SSL_HEADER"]))
|
|
if "PAPERLESS_PROXY_SSL_HEADER" in os.environ
|
|
else None
|
|
)
|
|
|
|
# The secret key has a default that should be fine so long as you're hosting
|
|
# Paperless on a closed network. However, if you're putting this anywhere
|
|
# public, you should change the key to something unique and verbose.
|
|
SECRET_KEY = os.getenv(
|
|
"PAPERLESS_SECRET_KEY",
|
|
"e11fl1oa-*ytql8p)(06fbj4ukrlo+n7k&q5+$1md7i+mge=ee",
|
|
)
|
|
|
|
AUTH_PASSWORD_VALIDATORS = [
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator",
|
|
},
|
|
{
|
|
"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator",
|
|
},
|
|
]
|
|
|
|
# Disable Django's artificial limit on the number of form fields to submit at
|
|
# once. This is a protection against overloading the server, but since this is
|
|
# a self-hosted sort of gig, the benefits of being able to mass-delete a ton
|
|
# of log entries outweigh the benefits of such a safeguard.
|
|
|
|
DATA_UPLOAD_MAX_NUMBER_FIELDS = None
|
|
|
|
COOKIE_PREFIX = os.getenv("PAPERLESS_COOKIE_PREFIX", "")
|
|
|
|
CSRF_COOKIE_NAME = f"{COOKIE_PREFIX}csrftoken"
|
|
SESSION_COOKIE_NAME = f"{COOKIE_PREFIX}sessionid"
|
|
LANGUAGE_COOKIE_NAME = f"{COOKIE_PREFIX}django_language"
|
|
|
|
EMAIL_CERTIFICATE_FILE = get_path_from_env("PAPERLESS_EMAIL_CERTIFICATE_LOCATION")
|
|
|
|
|
|
###############################################################################
|
|
# Database #
|
|
###############################################################################
|
|
|
|
DATABASES = parse_db_settings(DATA_DIR)
|
|
|
|
if os.getenv("PAPERLESS_DBENGINE") == "mariadb":
|
|
# Silence Django error on old MariaDB versions.
|
|
# VARCHAR can support > 255 in modern versions
|
|
# https://docs.djangoproject.com/en/4.1/ref/checks/#database
|
|
# https://mariadb.com/kb/en/innodb-system-variables/#innodb_large_prefix
|
|
SILENCED_SYSTEM_CHECKS = ["mysql.W003"]
|
|
|
|
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"
|
|
|
|
###############################################################################
|
|
# Internationalization #
|
|
###############################################################################
|
|
|
|
LANGUAGE_CODE = "en-us"
|
|
|
|
LANGUAGES = [
|
|
("en-us", _("English (US)")), # needs to be first to act as fallback language
|
|
("ar-ar", _("Arabic")),
|
|
("af-za", _("Afrikaans")),
|
|
("be-by", _("Belarusian")),
|
|
("bg-bg", _("Bulgarian")),
|
|
("ca-es", _("Catalan")),
|
|
("cs-cz", _("Czech")),
|
|
("da-dk", _("Danish")),
|
|
("de-de", _("German")),
|
|
("el-gr", _("Greek")),
|
|
("en-gb", _("English (GB)")),
|
|
("es-es", _("Spanish")),
|
|
("fa-ir", _("Persian")),
|
|
("fi-fi", _("Finnish")),
|
|
("fr-fr", _("French")),
|
|
("hu-hu", _("Hungarian")),
|
|
("id-id", _("Indonesian")),
|
|
("it-it", _("Italian")),
|
|
("ja-jp", _("Japanese")),
|
|
("ko-kr", _("Korean")),
|
|
("lb-lu", _("Luxembourgish")),
|
|
("no-no", _("Norwegian")),
|
|
("nl-nl", _("Dutch")),
|
|
("pl-pl", _("Polish")),
|
|
("pt-br", _("Portuguese (Brazil)")),
|
|
("pt-pt", _("Portuguese")),
|
|
("ro-ro", _("Romanian")),
|
|
("ru-ru", _("Russian")),
|
|
("sk-sk", _("Slovak")),
|
|
("sl-si", _("Slovenian")),
|
|
("sr-cs", _("Serbian")),
|
|
("sv-se", _("Swedish")),
|
|
("tr-tr", _("Turkish")),
|
|
("uk-ua", _("Ukrainian")),
|
|
("vi-vn", _("Vietnamese")),
|
|
("zh-cn", _("Chinese Simplified")),
|
|
("zh-tw", _("Chinese Traditional")),
|
|
]
|
|
|
|
LOCALE_PATHS = [BASE_DIR / "locale"]
|
|
|
|
TIME_ZONE = os.getenv("PAPERLESS_TIME_ZONE", "UTC")
|
|
|
|
USE_I18N = True
|
|
|
|
USE_L10N = True
|
|
|
|
USE_TZ = True
|
|
|
|
###############################################################################
|
|
# Logging #
|
|
###############################################################################
|
|
|
|
LOGGING_DIR.mkdir(parents=True, exist_ok=True)
|
|
|
|
LOGROTATE_MAX_SIZE = os.getenv("PAPERLESS_LOGROTATE_MAX_SIZE", 1024 * 1024)
|
|
LOGROTATE_MAX_BACKUPS = os.getenv("PAPERLESS_LOGROTATE_MAX_BACKUPS", 20)
|
|
|
|
LOGGING = {
|
|
"version": 1,
|
|
"disable_existing_loggers": False,
|
|
"formatters": {
|
|
"verbose": {
|
|
"format": "[{asctime}] [{levelname}] [{name}] {message}",
|
|
"style": "{",
|
|
},
|
|
"simple": {
|
|
"format": "{levelname} {message}",
|
|
"style": "{",
|
|
},
|
|
},
|
|
"handlers": {
|
|
"console": {
|
|
"level": "DEBUG" if DEBUG else "INFO",
|
|
"class": "logging.StreamHandler",
|
|
"formatter": "verbose",
|
|
},
|
|
"file_paperless": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": LOGGING_DIR / "paperless.log",
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
"file_mail": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": LOGGING_DIR / "mail.log",
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
"file_celery": {
|
|
"class": "concurrent_log_handler.ConcurrentRotatingFileHandler",
|
|
"formatter": "verbose",
|
|
"filename": LOGGING_DIR / "celery.log",
|
|
"maxBytes": LOGROTATE_MAX_SIZE,
|
|
"backupCount": LOGROTATE_MAX_BACKUPS,
|
|
},
|
|
},
|
|
"root": {"handlers": ["console"]},
|
|
"loggers": {
|
|
"paperless": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
"paperless_mail": {"handlers": ["file_mail"], "level": "DEBUG"},
|
|
"paperless_ai": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
"ocrmypdf": {"handlers": ["file_paperless"], "level": "INFO"},
|
|
"celery": {"handlers": ["file_celery"], "level": "DEBUG"},
|
|
"kombu": {"handlers": ["file_celery"], "level": "DEBUG"},
|
|
"_granian": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
"granian.access": {"handlers": ["file_paperless"], "level": "DEBUG"},
|
|
},
|
|
}
|
|
|
|
# Configure logging before calling any logger in settings.py so it will respect the log format, even if Django has not parsed the settings yet.
|
|
logging.config.dictConfig(LOGGING)
|
|
|
|
|
|
###############################################################################
|
|
# Task queue #
|
|
###############################################################################
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html
|
|
|
|
CELERY_BROKER_URL = _CELERY_REDIS_URL
|
|
CELERY_TIMEZONE = TIME_ZONE
|
|
|
|
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
|
|
CELERY_WORKER_CONCURRENCY: Final[int] = get_int_from_env("PAPERLESS_TASK_WORKERS", 1)
|
|
TASK_WORKERS = CELERY_WORKER_CONCURRENCY
|
|
CELERY_WORKER_MAX_TASKS_PER_CHILD = 1
|
|
CELERY_WORKER_SEND_TASK_EVENTS = True
|
|
CELERY_TASK_SEND_SENT_EVENT = True
|
|
CELERY_SEND_TASK_SENT_EVENT = True
|
|
CELERY_BROKER_CONNECTION_RETRY = True
|
|
CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
|
|
CELERY_BROKER_TRANSPORT_OPTIONS = {
|
|
"global_keyprefix": _REDIS_KEY_PREFIX,
|
|
}
|
|
|
|
CELERY_TASK_TRACK_STARTED = True
|
|
CELERY_TASK_TIME_LIMIT: Final[int] = get_int_from_env("PAPERLESS_WORKER_TIMEOUT", 1800)
|
|
|
|
CELERY_RESULT_EXTENDED = True
|
|
CELERY_RESULT_BACKEND = "django-db"
|
|
CELERY_CACHE_BACKEND = "default"
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#task-serializer
|
|
CELERY_TASK_SERIALIZER = "pickle"
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#std-setting-accept_content
|
|
CELERY_ACCEPT_CONTENT = ["application/json", "application/x-python-serialize"]
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule
|
|
CELERY_BEAT_SCHEDULE = parse_beat_schedule()
|
|
|
|
# https://docs.celeryq.dev/en/stable/userguide/configuration.html#beat-schedule-filename
|
|
CELERY_BEAT_SCHEDULE_FILENAME = str(DATA_DIR / "celerybeat-schedule.db")
|
|
|
|
|
|
# Cachalot: Database read cache.
|
|
def _parse_cachalot_settings():
|
|
ttl = get_int_from_env("PAPERLESS_READ_CACHE_TTL", 3600)
|
|
ttl = min(ttl, 31536000) if ttl > 0 else 3600
|
|
_, redis_url = parse_redis_url(
|
|
os.getenv("PAPERLESS_READ_CACHE_REDIS_URL", _CHANNELS_REDIS_URL),
|
|
)
|
|
result = {
|
|
"CACHALOT_CACHE": "read-cache",
|
|
"CACHALOT_ENABLED": get_bool_from_env(
|
|
"PAPERLESS_DB_READ_CACHE_ENABLED",
|
|
default="no",
|
|
),
|
|
"CACHALOT_FINAL_SQL_CHECK": True,
|
|
"CACHALOT_QUERY_KEYGEN": "paperless.db_cache.custom_get_query_cache_key",
|
|
"CACHALOT_TABLE_KEYGEN": "paperless.db_cache.custom_get_table_cache_key",
|
|
"CACHALOT_REDIS_URL": redis_url,
|
|
"CACHALOT_TIMEOUT": ttl,
|
|
}
|
|
return result
|
|
|
|
|
|
cachalot_settings = _parse_cachalot_settings()
|
|
CACHALOT_ENABLED = cachalot_settings["CACHALOT_ENABLED"]
|
|
if CACHALOT_ENABLED: # pragma: no cover
|
|
INSTALLED_APPS.append("cachalot")
|
|
CACHALOT_CACHE = cachalot_settings["CACHALOT_CACHE"]
|
|
CACHALOT_TIMEOUT = cachalot_settings["CACHALOT_TIMEOUT"]
|
|
CACHALOT_QUERY_KEYGEN = cachalot_settings["CACHALOT_QUERY_KEYGEN"]
|
|
CACHALOT_TABLE_KEYGEN = cachalot_settings["CACHALOT_TABLE_KEYGEN"]
|
|
CACHALOT_FINAL_SQL_CHECK = cachalot_settings["CACHALOT_FINAL_SQL_CHECK"]
|
|
|
|
|
|
# Django default & Cachalot cache configuration
|
|
_CACHE_BACKEND = os.environ.get(
|
|
"PAPERLESS_CACHE_BACKEND",
|
|
"django.core.cache.backends.locmem.LocMemCache"
|
|
if DEBUG
|
|
else "django.core.cache.backends.redis.RedisCache",
|
|
)
|
|
|
|
|
|
def _parse_caches():
|
|
return {
|
|
"default": {
|
|
"BACKEND": _CACHE_BACKEND,
|
|
"LOCATION": _CHANNELS_REDIS_URL,
|
|
"KEY_PREFIX": _REDIS_KEY_PREFIX,
|
|
},
|
|
"read-cache": {
|
|
"BACKEND": _CACHE_BACKEND,
|
|
"LOCATION": cachalot_settings["CACHALOT_REDIS_URL"],
|
|
"KEY_PREFIX": _REDIS_KEY_PREFIX,
|
|
},
|
|
}
|
|
|
|
|
|
CACHES = _parse_caches()
|
|
|
|
|
|
def default_threads_per_worker(task_workers) -> int:
|
|
# always leave one core open
|
|
available_cores = max(multiprocessing.cpu_count(), 1)
|
|
try:
|
|
return max(math.floor(available_cores / task_workers), 1)
|
|
except NotImplementedError:
|
|
return 1
|
|
|
|
|
|
THREADS_PER_WORKER = os.getenv(
|
|
"PAPERLESS_THREADS_PER_WORKER",
|
|
default_threads_per_worker(CELERY_WORKER_CONCURRENCY),
|
|
)
|
|
|
|
###############################################################################
|
|
# Paperless Specific Settings #
|
|
###############################################################################
|
|
|
|
IGNORABLE_FILES: Final[list[str]] = [
|
|
".DS_Store",
|
|
".DS_STORE",
|
|
"._*",
|
|
".stfolder/*",
|
|
".stversions/*",
|
|
".localized/*",
|
|
"desktop.ini",
|
|
"@eaDir/*",
|
|
"Thumbs.db",
|
|
]
|
|
|
|
CONSUMER_POLLING_INTERVAL = float(os.getenv("PAPERLESS_CONSUMER_POLLING_INTERVAL", 0))
|
|
|
|
CONSUMER_STABILITY_DELAY = float(os.getenv("PAPERLESS_CONSUMER_STABILITY_DELAY", 5))
|
|
|
|
CONSUMER_DELETE_DUPLICATES = get_bool_from_env("PAPERLESS_CONSUMER_DELETE_DUPLICATES")
|
|
|
|
CONSUMER_RECURSIVE = get_bool_from_env("PAPERLESS_CONSUMER_RECURSIVE")
|
|
|
|
# Ignore regex patterns, matched against filename only
|
|
CONSUMER_IGNORE_PATTERNS = list(
|
|
json.loads(
|
|
os.getenv(
|
|
"PAPERLESS_CONSUMER_IGNORE_PATTERNS",
|
|
json.dumps([]),
|
|
),
|
|
),
|
|
)
|
|
|
|
# Directories to always ignore. These are matched by directory name, not full path
|
|
CONSUMER_IGNORE_DIRS = list(
|
|
json.loads(
|
|
os.getenv(
|
|
"PAPERLESS_CONSUMER_IGNORE_DIRS",
|
|
json.dumps([]),
|
|
),
|
|
),
|
|
)
|
|
|
|
CONSUMER_SUBDIRS_AS_TAGS = get_bool_from_env("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
|
|
|
|
CONSUMER_ENABLE_BARCODES: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_ENABLE_BARCODES",
|
|
)
|
|
|
|
CONSUMER_BARCODE_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_BARCODE_TIFF_SUPPORT",
|
|
)
|
|
|
|
CONSUMER_BARCODE_STRING: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_BARCODE_STRING",
|
|
"PATCHT",
|
|
)
|
|
|
|
CONSUMER_ENABLE_ASN_BARCODE: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE",
|
|
)
|
|
|
|
CONSUMER_ASN_BARCODE_PREFIX: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_ASN_BARCODE_PREFIX",
|
|
"ASN",
|
|
)
|
|
|
|
CONSUMER_BARCODE_UPSCALE: Final[float] = get_float_from_env(
|
|
"PAPERLESS_CONSUMER_BARCODE_UPSCALE",
|
|
0.0,
|
|
)
|
|
|
|
CONSUMER_BARCODE_DPI: Final[int] = get_int_from_env(
|
|
"PAPERLESS_CONSUMER_BARCODE_DPI",
|
|
300,
|
|
)
|
|
|
|
CONSUMER_BARCODE_MAX_PAGES: Final[int] = get_int_from_env(
|
|
"PAPERLESS_CONSUMER_BARCODE_MAX_PAGES",
|
|
0,
|
|
)
|
|
|
|
CONSUMER_BARCODE_RETAIN_SPLIT_PAGES = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_BARCODE_RETAIN_SPLIT_PAGES",
|
|
)
|
|
|
|
CONSUMER_ENABLE_TAG_BARCODE: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_ENABLE_TAG_BARCODE",
|
|
)
|
|
|
|
CONSUMER_TAG_BARCODE_MAPPING = dict(
|
|
json.loads(
|
|
os.getenv(
|
|
"PAPERLESS_CONSUMER_TAG_BARCODE_MAPPING",
|
|
'{"TAG:(.*)": "\\\\g<1>"}',
|
|
),
|
|
),
|
|
)
|
|
|
|
CONSUMER_TAG_BARCODE_SPLIT: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_TAG_BARCODE_SPLIT",
|
|
)
|
|
|
|
CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_ENABLE_COLLATE_DOUBLE_SIDED",
|
|
)
|
|
|
|
CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME: Final[str] = os.getenv(
|
|
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_SUBDIR_NAME",
|
|
"double-sided",
|
|
)
|
|
|
|
CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_CONSUMER_COLLATE_DOUBLE_SIDED_TIFF_SUPPORT",
|
|
)
|
|
|
|
CONSUMER_PDF_RECOVERABLE_MIME_TYPES = ("application/octet-stream",)
|
|
|
|
OCR_PAGES = get_int_from_env("PAPERLESS_OCR_PAGES")
|
|
|
|
# The default language that tesseract will attempt to use when parsing
|
|
# documents. It should be a 3-letter language code consistent with ISO 639.
|
|
OCR_LANGUAGE = os.getenv("PAPERLESS_OCR_LANGUAGE", "eng")
|
|
|
|
# OCRmyPDF --output-type options are available.
|
|
OCR_OUTPUT_TYPE = os.getenv("PAPERLESS_OCR_OUTPUT_TYPE", "pdfa")
|
|
|
|
# skip. redo, force
|
|
OCR_MODE = os.getenv("PAPERLESS_OCR_MODE", "skip")
|
|
|
|
OCR_SKIP_ARCHIVE_FILE = os.getenv("PAPERLESS_OCR_SKIP_ARCHIVE_FILE", "never")
|
|
|
|
OCR_IMAGE_DPI = get_int_from_env("PAPERLESS_OCR_IMAGE_DPI")
|
|
|
|
OCR_CLEAN = os.getenv("PAPERLESS_OCR_CLEAN", "clean")
|
|
|
|
OCR_DESKEW: Final[bool] = get_bool_from_env("PAPERLESS_OCR_DESKEW", "true")
|
|
|
|
OCR_ROTATE_PAGES: Final[bool] = get_bool_from_env("PAPERLESS_OCR_ROTATE_PAGES", "true")
|
|
|
|
OCR_ROTATE_PAGES_THRESHOLD: Final[float] = get_float_from_env(
|
|
"PAPERLESS_OCR_ROTATE_PAGES_THRESHOLD",
|
|
12.0,
|
|
)
|
|
|
|
OCR_MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
|
|
"PAPERLESS_OCR_MAX_IMAGE_PIXELS",
|
|
)
|
|
|
|
OCR_COLOR_CONVERSION_STRATEGY = os.getenv(
|
|
"PAPERLESS_OCR_COLOR_CONVERSION_STRATEGY",
|
|
"RGB",
|
|
)
|
|
|
|
OCR_USER_ARGS = os.getenv("PAPERLESS_OCR_USER_ARGS")
|
|
|
|
MAX_IMAGE_PIXELS: Final[int | None] = get_int_from_env(
|
|
"PAPERLESS_MAX_IMAGE_PIXELS",
|
|
)
|
|
|
|
# GNUPG needs a home directory for some reason
|
|
GNUPG_HOME = os.getenv("HOME", "/tmp")
|
|
|
|
# Convert is part of the ImageMagick package
|
|
CONVERT_BINARY = os.getenv("PAPERLESS_CONVERT_BINARY", "convert")
|
|
CONVERT_TMPDIR = os.getenv("PAPERLESS_CONVERT_TMPDIR")
|
|
CONVERT_MEMORY_LIMIT = os.getenv("PAPERLESS_CONVERT_MEMORY_LIMIT")
|
|
|
|
GS_BINARY = os.getenv("PAPERLESS_GS_BINARY", "gs")
|
|
|
|
# Fallback layout for .eml consumption
|
|
EMAIL_PARSE_DEFAULT_LAYOUT = get_int_from_env(
|
|
"PAPERLESS_EMAIL_PARSE_DEFAULT_LAYOUT",
|
|
1, # MailRule.PdfLayout.TEXT_HTML but that can't be imported here
|
|
)
|
|
|
|
# Trigger a script after every successful document consumption?
|
|
PRE_CONSUME_SCRIPT = os.getenv("PAPERLESS_PRE_CONSUME_SCRIPT")
|
|
POST_CONSUME_SCRIPT = os.getenv("PAPERLESS_POST_CONSUME_SCRIPT")
|
|
|
|
# Specify the default date order (for autodetected dates)
|
|
DATE_ORDER = os.getenv("PAPERLESS_DATE_ORDER", "DMY")
|
|
FILENAME_DATE_ORDER = os.getenv("PAPERLESS_FILENAME_DATE_ORDER")
|
|
|
|
|
|
# If not set, we will infer it at runtime
|
|
DATE_PARSER_LANGUAGES = (
|
|
parse_dateparser_languages(
|
|
os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES"),
|
|
)
|
|
if os.getenv("PAPERLESS_DATE_PARSER_LANGUAGES")
|
|
else None
|
|
)
|
|
|
|
|
|
# Maximum number of dates taken from document start to end to show as suggestions for
|
|
# `created` date in the frontend. Duplicates are removed, which can result in
|
|
# fewer dates shown.
|
|
NUMBER_OF_SUGGESTED_DATES = get_int_from_env("PAPERLESS_NUMBER_OF_SUGGESTED_DATES", 3)
|
|
|
|
# Specify the filename format for out files
|
|
FILENAME_FORMAT = os.getenv("PAPERLESS_FILENAME_FORMAT")
|
|
|
|
# If this is enabled, variables in filename format will resolve to
|
|
# empty-string instead of 'none'.
|
|
# Directories with 'empty names' are omitted, too.
|
|
FILENAME_FORMAT_REMOVE_NONE = get_bool_from_env(
|
|
"PAPERLESS_FILENAME_FORMAT_REMOVE_NONE",
|
|
"NO",
|
|
)
|
|
|
|
THUMBNAIL_FONT_NAME = os.getenv(
|
|
"PAPERLESS_THUMBNAIL_FONT_NAME",
|
|
"/usr/share/fonts/liberation/LiberationSerif-Regular.ttf",
|
|
)
|
|
|
|
# Tika settings
|
|
TIKA_ENABLED = get_bool_from_env("PAPERLESS_TIKA_ENABLED", "NO")
|
|
TIKA_ENDPOINT = os.getenv("PAPERLESS_TIKA_ENDPOINT", "http://localhost:9998")
|
|
TIKA_GOTENBERG_ENDPOINT = os.getenv(
|
|
"PAPERLESS_TIKA_GOTENBERG_ENDPOINT",
|
|
"http://localhost:3000",
|
|
)
|
|
|
|
# Tika parser is now integrated into the main parser registry
|
|
# No separate Django app needed
|
|
|
|
AUDIT_LOG_ENABLED = get_bool_from_env("PAPERLESS_AUDIT_LOG_ENABLED", "true")
|
|
if AUDIT_LOG_ENABLED:
|
|
INSTALLED_APPS.append("auditlog")
|
|
MIDDLEWARE.append("auditlog.middleware.AuditlogMiddleware")
|
|
|
|
|
|
# List dates that should be ignored when trying to parse date from document text
|
|
IGNORE_DATES: set[datetime.date] = set()
|
|
|
|
if os.getenv("PAPERLESS_IGNORE_DATES") is not None:
|
|
IGNORE_DATES = parse_ignore_dates(os.getenv("PAPERLESS_IGNORE_DATES"), DATE_ORDER)
|
|
|
|
ENABLE_UPDATE_CHECK = os.getenv("PAPERLESS_ENABLE_UPDATE_CHECK", "default")
|
|
if ENABLE_UPDATE_CHECK != "default":
|
|
ENABLE_UPDATE_CHECK = get_bool_from_env("PAPERLESS_ENABLE_UPDATE_CHECK")
|
|
|
|
APP_TITLE = os.getenv("PAPERLESS_APP_TITLE", None)
|
|
APP_LOGO = os.getenv("PAPERLESS_APP_LOGO", None)
|
|
|
|
###############################################################################
|
|
# Machine Learning #
|
|
###############################################################################
|
|
|
|
|
|
def _get_nltk_language_setting(ocr_lang: str) -> str | None:
|
|
"""
|
|
Maps an ISO-639-1 language code supported by Tesseract into
|
|
an optional NLTK language name. This is the set of common supported
|
|
languages for all the NLTK data used.
|
|
|
|
Assumption: The primary language is first
|
|
|
|
NLTK Languages:
|
|
- https://www.nltk.org/api/nltk.stem.snowball.html#nltk.stem.snowball.SnowballStemmer
|
|
- https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/tokenizers/punkt.zip
|
|
- https://raw.githubusercontent.com/nltk/nltk_data/gh-pages/packages/corpora/stopwords.zip
|
|
|
|
The common intersection between all languages in those 3 is handled here
|
|
|
|
"""
|
|
ocr_lang = ocr_lang.split("+", maxsplit=1)[0]
|
|
iso_code_to_nltk = {
|
|
"dan": "danish",
|
|
"nld": "dutch",
|
|
"eng": "english",
|
|
"fin": "finnish",
|
|
"fra": "french",
|
|
"deu": "german",
|
|
"ita": "italian",
|
|
"nor": "norwegian",
|
|
"por": "portuguese",
|
|
"rus": "russian",
|
|
"spa": "spanish",
|
|
"swe": "swedish",
|
|
}
|
|
|
|
return iso_code_to_nltk.get(ocr_lang)
|
|
|
|
|
|
NLTK_ENABLED: Final[bool] = get_bool_from_env("PAPERLESS_ENABLE_NLTK", "yes")
|
|
|
|
NLTK_LANGUAGE: str | None = _get_nltk_language_setting(OCR_LANGUAGE)
|
|
|
|
###############################################################################
|
|
# Email Preprocessors #
|
|
###############################################################################
|
|
|
|
EMAIL_GNUPG_HOME: Final[str | None] = os.getenv("PAPERLESS_EMAIL_GNUPG_HOME")
|
|
EMAIL_ENABLE_GPG_DECRYPTOR: Final[bool] = get_bool_from_env(
|
|
"PAPERLESS_ENABLE_GPG_DECRYPTOR",
|
|
)
|
|
|
|
|
|
###############################################################################
|
|
# Soft Delete #
|
|
###############################################################################
|
|
EMPTY_TRASH_DELAY = max(get_int_from_env("PAPERLESS_EMPTY_TRASH_DELAY", 30), 1)
|
|
|
|
|
|
###############################################################################
|
|
# Oauth Email #
|
|
###############################################################################
|
|
OAUTH_CALLBACK_BASE_URL = os.getenv("PAPERLESS_OAUTH_CALLBACK_BASE_URL")
|
|
GMAIL_OAUTH_CLIENT_ID = os.getenv("PAPERLESS_GMAIL_OAUTH_CLIENT_ID")
|
|
GMAIL_OAUTH_CLIENT_SECRET = os.getenv("PAPERLESS_GMAIL_OAUTH_CLIENT_SECRET")
|
|
GMAIL_OAUTH_ENABLED = bool(
|
|
(OAUTH_CALLBACK_BASE_URL or PAPERLESS_URL)
|
|
and GMAIL_OAUTH_CLIENT_ID
|
|
and GMAIL_OAUTH_CLIENT_SECRET,
|
|
)
|
|
OUTLOOK_OAUTH_CLIENT_ID = os.getenv("PAPERLESS_OUTLOOK_OAUTH_CLIENT_ID")
|
|
OUTLOOK_OAUTH_CLIENT_SECRET = os.getenv("PAPERLESS_OUTLOOK_OAUTH_CLIENT_SECRET")
|
|
OUTLOOK_OAUTH_ENABLED = bool(
|
|
(OAUTH_CALLBACK_BASE_URL or PAPERLESS_URL)
|
|
and OUTLOOK_OAUTH_CLIENT_ID
|
|
and OUTLOOK_OAUTH_CLIENT_SECRET,
|
|
)
|
|
|
|
###############################################################################
|
|
# Webhooks
|
|
###############################################################################
|
|
WEBHOOKS_ALLOWED_SCHEMES = {
|
|
s.lower()
|
|
for s in get_list_from_env(
|
|
"PAPERLESS_WEBHOOKS_ALLOWED_SCHEMES",
|
|
default=["http", "https"],
|
|
)
|
|
}
|
|
WEBHOOKS_ALLOWED_PORTS = {
|
|
int(p) for p in get_list_from_env("PAPERLESS_WEBHOOKS_ALLOWED_PORTS", default=[])
|
|
}
|
|
WEBHOOKS_ALLOW_INTERNAL_REQUESTS = get_bool_from_env(
|
|
"PAPERLESS_WEBHOOKS_ALLOW_INTERNAL_REQUESTS",
|
|
"true",
|
|
)
|
|
|
|
###############################################################################
|
|
# Remote Parser #
|
|
###############################################################################
|
|
REMOTE_OCR_ENGINE = os.getenv("PAPERLESS_REMOTE_OCR_ENGINE")
|
|
REMOTE_OCR_API_KEY = os.getenv("PAPERLESS_REMOTE_OCR_API_KEY")
|
|
REMOTE_OCR_ENDPOINT = os.getenv("PAPERLESS_REMOTE_OCR_ENDPOINT")
|
|
|
|
################################################################################
|
|
# AI Settings #
|
|
################################################################################
|
|
AI_ENABLED = get_bool_from_env("PAPERLESS_AI_ENABLED", "NO")
|
|
LLM_EMBEDDING_BACKEND = os.getenv(
|
|
"PAPERLESS_AI_LLM_EMBEDDING_BACKEND",
|
|
) # "huggingface" or "openai"
|
|
LLM_EMBEDDING_MODEL = os.getenv("PAPERLESS_AI_LLM_EMBEDDING_MODEL")
|
|
LLM_BACKEND = os.getenv("PAPERLESS_AI_LLM_BACKEND") # "ollama" or "openai"
|
|
LLM_MODEL = os.getenv("PAPERLESS_AI_LLM_MODEL")
|
|
LLM_API_KEY = os.getenv("PAPERLESS_AI_LLM_API_KEY")
|
|
LLM_ENDPOINT = os.getenv("PAPERLESS_AI_LLM_ENDPOINT")
|
|
LLM_ALLOW_INTERNAL_ENDPOINTS = get_bool_from_env(
|
|
"PAPERLESS_AI_LLM_ALLOW_INTERNAL_ENDPOINTS",
|
|
"true",
|
|
)
|