mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Fix: Convert search dates to UTC in advanced search (#4891)
* Index documents using local timezone * Add local date parser
This commit is contained in:
		
							parent
							
								
									fbf1a051a2
								
							
						
					
					
						commit
						af0817ab74
					
				@ -25,9 +25,11 @@ from whoosh.index import open_dir
 | 
				
			|||||||
from whoosh.qparser import MultifieldParser
 | 
					from whoosh.qparser import MultifieldParser
 | 
				
			||||||
from whoosh.qparser import QueryParser
 | 
					from whoosh.qparser import QueryParser
 | 
				
			||||||
from whoosh.qparser.dateparse import DateParserPlugin
 | 
					from whoosh.qparser.dateparse import DateParserPlugin
 | 
				
			||||||
 | 
					from whoosh.qparser.dateparse import English
 | 
				
			||||||
from whoosh.scoring import TF_IDF
 | 
					from whoosh.scoring import TF_IDF
 | 
				
			||||||
from whoosh.searching import ResultsPage
 | 
					from whoosh.searching import ResultsPage
 | 
				
			||||||
from whoosh.searching import Searcher
 | 
					from whoosh.searching import Searcher
 | 
				
			||||||
 | 
					from whoosh.util.times import timespan
 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# from documents.models import CustomMetadata
 | 
					# from documents.models import CustomMetadata
 | 
				
			||||||
@ -356,6 +358,22 @@ class DelayedQuery:
 | 
				
			|||||||
        return page
 | 
					        return page
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class LocalDateParser(English):
 | 
				
			||||||
 | 
					    def reverse_timezone_offset(self, d):
 | 
				
			||||||
 | 
					        return (d.replace(tzinfo=timezone.get_current_timezone())).astimezone(
 | 
				
			||||||
 | 
					            timezone.utc,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def date_from(self, *args, **kwargs):
 | 
				
			||||||
 | 
					        d = super().date_from(*args, **kwargs)
 | 
				
			||||||
 | 
					        if isinstance(d, timespan):
 | 
				
			||||||
 | 
					            d.start = self.reverse_timezone_offset(d.start)
 | 
				
			||||||
 | 
					            d.end = self.reverse_timezone_offset(d.end)
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            d = self.reverse_timezone_offset(d)
 | 
				
			||||||
 | 
					        return d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class DelayedFullTextQuery(DelayedQuery):
 | 
					class DelayedFullTextQuery(DelayedQuery):
 | 
				
			||||||
    def _get_query(self):
 | 
					    def _get_query(self):
 | 
				
			||||||
        q_str = self.query_params["query"]
 | 
					        q_str = self.query_params["query"]
 | 
				
			||||||
@ -371,7 +389,12 @@ class DelayedFullTextQuery(DelayedQuery):
 | 
				
			|||||||
            ],
 | 
					            ],
 | 
				
			||||||
            self.searcher.ixreader.schema,
 | 
					            self.searcher.ixreader.schema,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        qp.add_plugin(DateParserPlugin(basedate=timezone.now()))
 | 
					        qp.add_plugin(
 | 
				
			||||||
 | 
					            DateParserPlugin(
 | 
				
			||||||
 | 
					                basedate=timezone.now(),
 | 
				
			||||||
 | 
					                dateparser=LocalDateParser(),
 | 
				
			||||||
 | 
					            ),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
        q = qp.parse(q_str)
 | 
					        q = qp.parse(q_str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        corrected = self.searcher.correct_query(q, q_str)
 | 
					        corrected = self.searcher.correct_query(q, q_str)
 | 
				
			||||||
 | 
				
			|||||||
@ -964,6 +964,62 @@ class TestDocumentApi(DirectoriesMixin, DocumentConsumeDelayMixin, APITestCase):
 | 
				
			|||||||
            # Assert subset in results
 | 
					            # Assert subset in results
 | 
				
			||||||
            self.assertDictEqual(result, {**result, **subset})
 | 
					            self.assertDictEqual(result, {**result, **subset})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @override_settings(
 | 
				
			||||||
 | 
					        TIME_ZONE="Europe/Sofia",
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    def test_search_added_specific_date_with_timezone_ahead(self):
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        GIVEN:
 | 
				
			||||||
 | 
					            - Two documents added right now
 | 
				
			||||||
 | 
					            - One document added on a specific date
 | 
				
			||||||
 | 
					            - The timezone is behind UTC time (+2)
 | 
				
			||||||
 | 
					        WHEN:
 | 
				
			||||||
 | 
					            - Query for documents added on a specific date
 | 
				
			||||||
 | 
					        THEN:
 | 
				
			||||||
 | 
					            - The one document is returned
 | 
				
			||||||
 | 
					        """
 | 
				
			||||||
 | 
					        d1 = Document.objects.create(
 | 
				
			||||||
 | 
					            title="invoice",
 | 
				
			||||||
 | 
					            content="the thing i bought at a shop and paid with bank account",
 | 
				
			||||||
 | 
					            checksum="A",
 | 
				
			||||||
 | 
					            pk=1,
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        d2 = Document.objects.create(
 | 
				
			||||||
 | 
					            title="bank statement 1",
 | 
				
			||||||
 | 
					            content="things i paid for in august",
 | 
				
			||||||
 | 
					            pk=2,
 | 
				
			||||||
 | 
					            checksum="B",
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        d3 = Document.objects.create(
 | 
				
			||||||
 | 
					            title="bank statement 3",
 | 
				
			||||||
 | 
					            content="things i paid for in september",
 | 
				
			||||||
 | 
					            pk=3,
 | 
				
			||||||
 | 
					            checksum="C",
 | 
				
			||||||
 | 
					            # specific time zone aware date
 | 
				
			||||||
 | 
					            added=timezone.make_aware(datetime.datetime(2023, 12, 1)),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        # refresh doc instance to ensure we operate on date objects that Django uses
 | 
				
			||||||
 | 
					        # Django converts dates to UTC
 | 
				
			||||||
 | 
					        d3.refresh_from_db()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        with index.open_index_writer() as writer:
 | 
				
			||||||
 | 
					            index.update_document(writer, d1)
 | 
				
			||||||
 | 
					            index.update_document(writer, d2)
 | 
				
			||||||
 | 
					            index.update_document(writer, d3)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        response = self.client.get("/api/documents/?query=added:20231201")
 | 
				
			||||||
 | 
					        results = response.data["results"]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Expect 1 document returned
 | 
				
			||||||
 | 
					        self.assertEqual(len(results), 1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for idx, subset in enumerate(
 | 
				
			||||||
 | 
					            [{"id": 3, "title": "bank statement 3"}],
 | 
				
			||||||
 | 
					        ):
 | 
				
			||||||
 | 
					            result = results[idx]
 | 
				
			||||||
 | 
					            # Assert subset in results
 | 
				
			||||||
 | 
					            self.assertDictEqual(result, {**result, **subset})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_search_added_in_last_month(self):
 | 
					    def test_search_added_in_last_month(self):
 | 
				
			||||||
        """
 | 
					        """
 | 
				
			||||||
        GIVEN:
 | 
					        GIVEN:
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user