mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-24 02:02:23 -04:00
Unify prompts, cover
This commit is contained in:
parent
014eafe3d1
commit
62fd722019
@ -21,6 +21,7 @@ def build_prompt_without_rag(document: Document) -> str:
|
|||||||
Never ask for further information, additional content or ask questions. Never include any other text.
|
Never ask for further information, additional content or ask questions. Never include any other text.
|
||||||
Suggested tags and document types must be strictly based on the content of the document.
|
Suggested tags and document types must be strictly based on the content of the document.
|
||||||
Do not change the field names or the JSON structure, only provide the values. Use double quotes and proper JSON syntax.
|
Do not change the field names or the JSON structure, only provide the values. Use double quotes and proper JSON syntax.
|
||||||
|
Each field must be a list of plain strings.
|
||||||
|
|
||||||
The JSON object must contain the following fields:
|
The JSON object must contain the following fields:
|
||||||
- title: A short, descriptive title
|
- title: A short, descriptive title
|
||||||
@ -30,8 +31,6 @@ def build_prompt_without_rag(document: Document) -> str:
|
|||||||
- storage_paths: Suggested folder paths (e.g. "Medical/Insurance")
|
- storage_paths: Suggested folder paths (e.g. "Medical/Insurance")
|
||||||
- dates: List up to 3 relevant dates in YYYY-MM-DD format
|
- dates: List up to 3 relevant dates in YYYY-MM-DD format
|
||||||
|
|
||||||
Respond ONLY in JSON.
|
|
||||||
Each field must be a list of plain strings.
|
|
||||||
The format of the JSON object is as follows:
|
The format of the JSON object is as follows:
|
||||||
{{
|
{{
|
||||||
"title": "xxxxx",
|
"title": "xxxxx",
|
||||||
@ -43,7 +42,6 @@ def build_prompt_without_rag(document: Document) -> str:
|
|||||||
}}
|
}}
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
||||||
FILENAME:
|
FILENAME:
|
||||||
{filename}
|
{filename}
|
||||||
|
|
||||||
@ -56,41 +54,9 @@ def build_prompt_without_rag(document: Document) -> str:
|
|||||||
|
|
||||||
def build_prompt_with_rag(document: Document) -> str:
|
def build_prompt_with_rag(document: Document) -> str:
|
||||||
context = get_context_for_document(document)
|
context = get_context_for_document(document)
|
||||||
content = document.content or ""
|
prompt = build_prompt_without_rag(document)
|
||||||
filename = document.filename or ""
|
|
||||||
|
|
||||||
prompt = f"""
|
prompt += f"""
|
||||||
You are a helpful assistant that extracts structured information from documents.
|
|
||||||
You have access to similar documents as context to help improve suggestions.
|
|
||||||
|
|
||||||
Only output valid JSON in the format below. No additional explanations.
|
|
||||||
|
|
||||||
The JSON object must contain:
|
|
||||||
- title: A short, human-readable, descriptive title based on the content
|
|
||||||
- tags: A list of relevant topics
|
|
||||||
- correspondents: People or organizations involved
|
|
||||||
- document_types: Type or category of the document
|
|
||||||
- storage_paths: Suggested folder paths
|
|
||||||
- dates: Up to 3 relevant dates in YYYY-MM-DD
|
|
||||||
|
|
||||||
Respond ONLY in JSON.
|
|
||||||
Each field must be a list of plain strings.
|
|
||||||
The format of the JSON object is as follows:
|
|
||||||
{{
|
|
||||||
"title": "xxxxx",
|
|
||||||
"tags": ["xxxx", "xxxx"],
|
|
||||||
"correspondents": ["xxxx", "xxxx"],
|
|
||||||
"document_types": ["xxxx", "xxxx"],
|
|
||||||
"storage_paths": ["xxxx", "xxxx"],
|
|
||||||
"dates": ["YYYY-MM-DD", "YYYY-MM-DD", "YYYY-MM-DD"],
|
|
||||||
}}
|
|
||||||
|
|
||||||
Here is the document:
|
|
||||||
FILENAME:
|
|
||||||
{filename}
|
|
||||||
|
|
||||||
CONTENT:
|
|
||||||
{content[:4000]}
|
|
||||||
|
|
||||||
CONTEXT FROM SIMILAR DOCUMENTS:
|
CONTEXT FROM SIMILAR DOCUMENTS:
|
||||||
{context[:4000]}
|
{context[:4000]}
|
||||||
|
@ -6,6 +6,8 @@ import pytest
|
|||||||
from django.test import override_settings
|
from django.test import override_settings
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from paperless.ai.ai_classifier import build_prompt_with_rag
|
||||||
|
from paperless.ai.ai_classifier import build_prompt_without_rag
|
||||||
from paperless.ai.ai_classifier import get_ai_document_classification
|
from paperless.ai.ai_classifier import get_ai_document_classification
|
||||||
from paperless.ai.ai_classifier import parse_ai_response
|
from paperless.ai.ai_classifier import parse_ai_response
|
||||||
|
|
||||||
@ -101,3 +103,15 @@ def test_use_without_rag_if_not_configured(
|
|||||||
mock_run_llm_query.return_value.text = json.dumps({})
|
mock_run_llm_query.return_value.text = json.dumps({})
|
||||||
get_ai_document_classification(mock_document)
|
get_ai_document_classification(mock_document)
|
||||||
mock_build_prompt_without_rag.assert_called_once()
|
mock_build_prompt_without_rag.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
@override_settings(
|
||||||
|
LLM_BACKEND="ollama",
|
||||||
|
LLM_MODEL="some_model",
|
||||||
|
)
|
||||||
|
def test_prompt_with_without_rag(mock_document):
|
||||||
|
prompt = build_prompt_without_rag(mock_document)
|
||||||
|
assert "CONTEXT FROM SIMILAR DOCUMENTS:" not in prompt
|
||||||
|
|
||||||
|
prompt = build_prompt_with_rag(mock_document)
|
||||||
|
assert "CONTEXT FROM SIMILAR DOCUMENTS:" in prompt
|
||||||
|
Loading…
x
Reference in New Issue
Block a user