mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-10-25 15:52:35 -04:00 
			
		
		
		
	some initial attempts to merge search and document list
This commit is contained in:
		
							parent
							
								
									6d20fc14ab
								
							
						
					
					
						commit
						f9263ddb62
					
				| @ -2,6 +2,7 @@ import logging | |||||||
| import os | import os | ||||||
| from contextlib import contextmanager | from contextlib import contextmanager | ||||||
| 
 | 
 | ||||||
|  | import math | ||||||
| from django.conf import settings | from django.conf import settings | ||||||
| from whoosh import highlight, classify, query | from whoosh import highlight, classify, query | ||||||
| from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME | from whoosh.fields import Schema, TEXT, NUMERIC, KEYWORD, DATETIME | ||||||
| @ -9,8 +10,10 @@ from whoosh.highlight import Formatter, get_text | |||||||
| from whoosh.index import create_in, exists_in, open_dir | from whoosh.index import create_in, exists_in, open_dir | ||||||
| from whoosh.qparser import MultifieldParser | from whoosh.qparser import MultifieldParser | ||||||
| from whoosh.qparser.dateparse import DateParserPlugin | from whoosh.qparser.dateparse import DateParserPlugin | ||||||
|  | from whoosh.searching import ResultsPage | ||||||
| from whoosh.writing import AsyncWriter | from whoosh.writing import AsyncWriter | ||||||
| 
 | 
 | ||||||
|  | from documents.models import Document | ||||||
| 
 | 
 | ||||||
| logger = logging.getLogger("paperless.index") | logger = logging.getLogger("paperless.index") | ||||||
| 
 | 
 | ||||||
| @ -66,6 +69,7 @@ def get_schema(): | |||||||
|         title=TEXT(stored=True), |         title=TEXT(stored=True), | ||||||
|         content=TEXT(), |         content=TEXT(), | ||||||
|         correspondent=TEXT(stored=True), |         correspondent=TEXT(stored=True), | ||||||
|  |         correspondent_id=NUMERIC(stored=True, numtype=int), | ||||||
|         tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), |         tag=KEYWORD(stored=True, commas=True, scorable=True, lowercase=True), | ||||||
|         type=TEXT(stored=True), |         type=TEXT(stored=True), | ||||||
|         created=DATETIME(stored=True, sortable=True), |         created=DATETIME(stored=True, sortable=True), | ||||||
| @ -109,6 +113,7 @@ def update_document(writer, doc): | |||||||
|         title=doc.title, |         title=doc.title, | ||||||
|         content=doc.content, |         content=doc.content, | ||||||
|         correspondent=doc.correspondent.name if doc.correspondent else None, |         correspondent=doc.correspondent.name if doc.correspondent else None, | ||||||
|  |         correspondent_id=doc.correspondent.id if doc.correspondent else None, | ||||||
|         tag=tags if tags else None, |         tag=tags if tags else None, | ||||||
|         type=doc.document_type.name if doc.document_type else None, |         type=doc.document_type.name if doc.document_type else None, | ||||||
|         created=doc.created, |         created=doc.created, | ||||||
| @ -181,6 +186,65 @@ def query_page(ix, page, querystring, more_like_doc_id, more_like_doc_content): | |||||||
|         searcher.close() |         searcher.close() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class DelayedQuery: | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def _query(self): | ||||||
|  |         if 'query' in self.query_params: | ||||||
|  |             qp = MultifieldParser( | ||||||
|  |                 ["content", "title", "correspondent", "tag", "type"], | ||||||
|  |                 self.ix.schema) | ||||||
|  |             qp.add_plugin(DateParserPlugin()) | ||||||
|  |             q = qp.parse(self.query_params['query']) | ||||||
|  |         elif 'more_like_id' in self.query_params: | ||||||
|  |             more_like_doc_id = int(self.query_params['more_like_id']) | ||||||
|  |             content = Document.objects.get(id=more_like_doc_id).content | ||||||
|  | 
 | ||||||
|  |             docnum = self.searcher.document_number(id=more_like_doc_id) | ||||||
|  |             kts = self.searcher.key_terms_from_text( | ||||||
|  |                 'content', content, numterms=20, | ||||||
|  |                 model=classify.Bo1Model, normalize=False) | ||||||
|  |             q = query.Or( | ||||||
|  |                 [query.Term('content', word, boost=weight) | ||||||
|  |                  for word, weight in kts]) | ||||||
|  |         else: | ||||||
|  |             raise ValueError( | ||||||
|  |                 "Either query or more_like_id is required." | ||||||
|  |             ) | ||||||
|  |         return q | ||||||
|  | 
 | ||||||
|  |     @property | ||||||
|  |     def _query_filter(self): | ||||||
|  |         criterias = [] | ||||||
|  |         for k, v in self.query_params.items(): | ||||||
|  |             if k == 'correspondent__id': | ||||||
|  |                 criterias.append(query.Term('correspondent_id', v)) | ||||||
|  |         if len(criterias) > 0: | ||||||
|  |             return query.And(criterias) | ||||||
|  |         else: | ||||||
|  |             return None | ||||||
|  | 
 | ||||||
|  |     def __init__(self, ix, searcher, query_params, page_size): | ||||||
|  |         self.ix = ix | ||||||
|  |         self.searcher = searcher | ||||||
|  |         self.query_params = query_params | ||||||
|  |         self.page_size = page_size | ||||||
|  | 
 | ||||||
|  |     def __len__(self): | ||||||
|  |         results = self.searcher.search(self._query, limit=1, filter=self._query_filter) | ||||||
|  |         return len(results) | ||||||
|  |         #return 1000 | ||||||
|  | 
 | ||||||
|  |     def __getitem__(self, item): | ||||||
|  |         page: ResultsPage = self.searcher.search_page( | ||||||
|  |             self._query, | ||||||
|  |             filter=self._query_filter, | ||||||
|  |             pagenum=math.floor(item.start / self.page_size) + 1, | ||||||
|  |             pagelen=self.page_size | ||||||
|  |         ) | ||||||
|  |         return page | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
| def autocomplete(ix, term, limit=10): | def autocomplete(ix, term, limit=10): | ||||||
|     with ix.reader() as reader: |     with ix.reader() as reader: | ||||||
|         terms = [] |         terms = [] | ||||||
|  | |||||||
| @ -35,6 +35,7 @@ from rest_framework.viewsets import ( | |||||||
| 
 | 
 | ||||||
| from paperless.db import GnuPG | from paperless.db import GnuPG | ||||||
| from paperless.views import StandardPagination | from paperless.views import StandardPagination | ||||||
|  | from . import index | ||||||
| from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \ | from .bulk_download import OriginalAndArchiveStrategy, OriginalsOnlyStrategy, \ | ||||||
|     ArchiveOnlyStrategy |     ArchiveOnlyStrategy | ||||||
| from .classifier import load_classifier | from .classifier import load_classifier | ||||||
| @ -326,6 +327,45 @@ class DocumentViewSet(RetrieveModelMixin, | |||||||
|             raise Http404() |             raise Http404() | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  | class SearchResultSerializer(DocumentSerializer): | ||||||
|  | 
 | ||||||
|  |     def to_representation(self, instance): | ||||||
|  |         doc = Document.objects.get(id=instance['id']) | ||||||
|  |         # repressentation = super(SearchResultSerializer, self).to_representation(doc) | ||||||
|  |         # repressentation['__search_hit__'] = { | ||||||
|  |         #     "score": instance.score | ||||||
|  |         # } | ||||||
|  |         return super(SearchResultSerializer, self).to_representation(doc) | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class UnifiedSearchViewSet(DocumentViewSet): | ||||||
|  | 
 | ||||||
|  |     def get_serializer_class(self): | ||||||
|  |         if self._is_search_request(): | ||||||
|  |             return SearchResultSerializer | ||||||
|  |         else: | ||||||
|  |             return DocumentSerializer | ||||||
|  | 
 | ||||||
|  |     def _is_search_request(self): | ||||||
|  |         return "query" in self.request.query_params | ||||||
|  | 
 | ||||||
|  |     def filter_queryset(self, queryset): | ||||||
|  | 
 | ||||||
|  |         if self._is_search_request(): | ||||||
|  |             ix = index.open_index() | ||||||
|  |             return index.DelayedQuery(ix, self.searcher, self.request.query_params, self.paginator.page_size) | ||||||
|  |         else: | ||||||
|  |             return super(UnifiedSearchViewSet, self).filter_queryset(queryset) | ||||||
|  | 
 | ||||||
|  |     def list(self, request, *args, **kwargs): | ||||||
|  |         if self._is_search_request(): | ||||||
|  |             ix = index.open_index() | ||||||
|  |             with ix.searcher() as s: | ||||||
|  |                 self.searcher = s | ||||||
|  |                 return super(UnifiedSearchViewSet, self).list(request) | ||||||
|  |         else: | ||||||
|  |             return super(UnifiedSearchViewSet, self).list(request) | ||||||
|  | 
 | ||||||
| class LogViewSet(ViewSet): | class LogViewSet(ViewSet): | ||||||
| 
 | 
 | ||||||
|     permission_classes = (IsAuthenticated,) |     permission_classes = (IsAuthenticated,) | ||||||
|  | |||||||
| @ -12,7 +12,7 @@ from django.utils.translation import gettext_lazy as _ | |||||||
| from paperless.consumers import StatusConsumer | from paperless.consumers import StatusConsumer | ||||||
| from documents.views import ( | from documents.views import ( | ||||||
|     CorrespondentViewSet, |     CorrespondentViewSet, | ||||||
|     DocumentViewSet, |     UnifiedSearchViewSet, | ||||||
|     LogViewSet, |     LogViewSet, | ||||||
|     TagViewSet, |     TagViewSet, | ||||||
|     DocumentTypeViewSet, |     DocumentTypeViewSet, | ||||||
| @ -31,7 +31,7 @@ from paperless.views import FaviconView | |||||||
| api_router = DefaultRouter() | api_router = DefaultRouter() | ||||||
| api_router.register(r"correspondents", CorrespondentViewSet) | api_router.register(r"correspondents", CorrespondentViewSet) | ||||||
| api_router.register(r"document_types", DocumentTypeViewSet) | api_router.register(r"document_types", DocumentTypeViewSet) | ||||||
| api_router.register(r"documents", DocumentViewSet) | api_router.register(r"documents", UnifiedSearchViewSet) | ||||||
| api_router.register(r"logs", LogViewSet, basename="logs") | api_router.register(r"logs", LogViewSet, basename="logs") | ||||||
| api_router.register(r"tags", TagViewSet) | api_router.register(r"tags", TagViewSet) | ||||||
| api_router.register(r"saved_views", SavedViewViewSet) | api_router.register(r"saved_views", SavedViewViewSet) | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user