mirror of
https://github.com/paperless-ngx/paperless-ngx.git
synced 2025-05-24 02:02:23 -04:00
Better respect perms for ai suggestions
This commit is contained in:
parent
12e89088d7
commit
62588e9819
@ -756,7 +756,7 @@ class DocumentViewSet(
|
|||||||
refresh_suggestions_cache(doc.pk)
|
refresh_suggestions_cache(doc.pk)
|
||||||
return Response(cached_llm_suggestions.suggestions)
|
return Response(cached_llm_suggestions.suggestions)
|
||||||
|
|
||||||
llm_suggestions = get_ai_document_classification(doc)
|
llm_suggestions = get_ai_document_classification(doc, request.user)
|
||||||
|
|
||||||
matched_tags = match_tags_by_name(
|
matched_tags = match_tags_by_name(
|
||||||
llm_suggestions.get("tags", []),
|
llm_suggestions.get("tags", []),
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from django.contrib.auth.models import User
|
||||||
from llama_index.core.base.llms.types import CompletionResponse
|
from llama_index.core.base.llms.types import CompletionResponse
|
||||||
|
|
||||||
from documents.models import Document
|
from documents.models import Document
|
||||||
|
from documents.permissions import get_objects_for_user_owner_aware
|
||||||
from paperless.ai.client import AIClient
|
from paperless.ai.client import AIClient
|
||||||
from paperless.ai.indexing import query_similar_documents
|
from paperless.ai.indexing import query_similar_documents
|
||||||
from paperless.config import AIConfig
|
from paperless.config import AIConfig
|
||||||
@ -52,8 +54,8 @@ def build_prompt_without_rag(document: Document) -> str:
|
|||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
def build_prompt_with_rag(document: Document) -> str:
|
def build_prompt_with_rag(document: Document, user: User | None = None) -> str:
|
||||||
context = get_context_for_document(document)
|
context = get_context_for_document(document, user)
|
||||||
prompt = build_prompt_without_rag(document)
|
prompt = build_prompt_without_rag(document)
|
||||||
|
|
||||||
prompt += f"""
|
prompt += f"""
|
||||||
@ -65,8 +67,26 @@ def build_prompt_with_rag(document: Document) -> str:
|
|||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
|
||||||
def get_context_for_document(doc: Document, max_docs: int = 5) -> str:
|
def get_context_for_document(
|
||||||
similar_docs = query_similar_documents(doc)[:max_docs]
|
doc: Document,
|
||||||
|
user: User | None = None,
|
||||||
|
max_docs: int = 5,
|
||||||
|
) -> str:
|
||||||
|
visible_documents = (
|
||||||
|
get_objects_for_user_owner_aware(
|
||||||
|
user,
|
||||||
|
"view_document",
|
||||||
|
Document,
|
||||||
|
)
|
||||||
|
if user
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
similar_docs = query_similar_documents(
|
||||||
|
document=doc,
|
||||||
|
document_ids=[document.pk for document in visible_documents]
|
||||||
|
if visible_documents
|
||||||
|
else None,
|
||||||
|
)[:max_docs]
|
||||||
context_blocks = []
|
context_blocks = []
|
||||||
for similar in similar_docs:
|
for similar in similar_docs:
|
||||||
text = similar.content or ""
|
text = similar.content or ""
|
||||||
@ -91,11 +111,14 @@ def parse_ai_response(response: CompletionResponse) -> dict:
|
|||||||
return {}
|
return {}
|
||||||
|
|
||||||
|
|
||||||
def get_ai_document_classification(document: Document) -> dict:
|
def get_ai_document_classification(
|
||||||
|
document: Document,
|
||||||
|
user: User | None = None,
|
||||||
|
) -> dict:
|
||||||
ai_config = AIConfig()
|
ai_config = AIConfig()
|
||||||
|
|
||||||
prompt = (
|
prompt = (
|
||||||
build_prompt_with_rag(document)
|
build_prompt_with_rag(document, user)
|
||||||
if ai_config.llm_embedding_backend
|
if ai_config.llm_embedding_backend
|
||||||
else build_prompt_without_rag(document)
|
else build_prompt_without_rag(document)
|
||||||
)
|
)
|
||||||
|
@ -206,12 +206,32 @@ def llm_index_remove_document(document: Document):
|
|||||||
index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
index.storage_context.persist(persist_dir=settings.LLM_INDEX_DIR)
|
||||||
|
|
||||||
|
|
||||||
def query_similar_documents(document: Document, top_k: int = 5) -> list[Document]:
|
def query_similar_documents(
|
||||||
|
document: Document,
|
||||||
|
top_k: int = 5,
|
||||||
|
document_ids: list[int] | None = None,
|
||||||
|
) -> list[Document]:
|
||||||
"""
|
"""
|
||||||
Runs a similarity query and returns top-k similar Document objects.
|
Runs a similarity query and returns top-k similar Document objects.
|
||||||
"""
|
"""
|
||||||
index = load_or_build_index()
|
index = load_or_build_index()
|
||||||
retriever = VectorIndexRetriever(index=index, similarity_top_k=top_k)
|
|
||||||
|
# constrain only the node(s) that match the document IDs, if given
|
||||||
|
doc_node_ids = (
|
||||||
|
[
|
||||||
|
node.node_id
|
||||||
|
for node in index.docstore.docs.values()
|
||||||
|
if node.metadata.get("document_id") in document_ids
|
||||||
|
]
|
||||||
|
if document_ids
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
|
retriever = VectorIndexRetriever(
|
||||||
|
index=index,
|
||||||
|
similarity_top_k=top_k,
|
||||||
|
doc_ids=doc_node_ids,
|
||||||
|
)
|
||||||
|
|
||||||
query_text = (document.title or "") + "\n" + (document.content or "")
|
query_text = (document.title or "") + "\n" + (document.content or "")
|
||||||
results = retriever.retrieve(query_text)
|
results = retriever.retrieve(query_text)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user