mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	Ensure the tika parse function gets a string, not a PathLike
This commit is contained in:
		
							parent
							
								
									17ae2aacbf
								
							
						
					
					
						commit
						d4cb84ff76
					
				@ -1,4 +1,5 @@
 | 
			
		||||
import os
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
 | 
			
		||||
import dateutil.parser
 | 
			
		||||
import requests
 | 
			
		||||
@ -28,6 +29,11 @@ class TikaDocumentParser(DocumentParser):
 | 
			
		||||
 | 
			
		||||
    def extract_metadata(self, document_path, mime_type):
 | 
			
		||||
        tika_server = settings.TIKA_ENDPOINT
 | 
			
		||||
 | 
			
		||||
        # tika does not support a PathLike, only strings
 | 
			
		||||
        # ensure this is a string
 | 
			
		||||
        document_path = str(document_path)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            parsed = parser.from_file(document_path, tika_server)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
@ -47,10 +53,14 @@ class TikaDocumentParser(DocumentParser):
 | 
			
		||||
            for key in parsed["metadata"]
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
    def parse(self, document_path, mime_type, file_name=None):
 | 
			
		||||
    def parse(self, document_path: Path, mime_type, file_name=None):
 | 
			
		||||
        self.log("info", f"Sending {document_path} to Tika server")
 | 
			
		||||
        tika_server = settings.TIKA_ENDPOINT
 | 
			
		||||
 | 
			
		||||
        # tika does not support a PathLike, only strings
 | 
			
		||||
        # ensure this is a string
 | 
			
		||||
        document_path = str(document_path)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            parsed = parser.from_file(document_path, tika_server)
 | 
			
		||||
        except Exception as err:
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user