mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-02 18:47:10 -05:00 
			
		
		
		
	Added test for duplicates
This commit is contained in:
		
							parent
							
								
									2853545b9d
								
							
						
					
					
						commit
						64b72d4337
					
				@ -1,4 +1,5 @@
 | 
				
			|||||||
import datetime
 | 
					import datetime
 | 
				
			||||||
 | 
					import hashlib
 | 
				
			||||||
import logging
 | 
					import logging
 | 
				
			||||||
import tempfile
 | 
					import tempfile
 | 
				
			||||||
import uuid
 | 
					import uuid
 | 
				
			||||||
@ -101,6 +102,14 @@ class Consumer(object):
 | 
				
			|||||||
            if self._is_ready(doc):
 | 
					            if self._is_ready(doc):
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if self._is_duplicate(doc):
 | 
				
			||||||
 | 
					                self.log(
 | 
				
			||||||
 | 
					                    "info",
 | 
				
			||||||
 | 
					                    "Skipping {} as it appears to be a duplicate".format(doc)
 | 
				
			||||||
 | 
					                )
 | 
				
			||||||
 | 
					                self._ignore.append(doc)
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.logging_group = uuid.uuid4()
 | 
					            self.logging_group = uuid.uuid4()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            self.log("info", "Consuming {}".format(doc))
 | 
					            self.log("info", "Consuming {}".format(doc))
 | 
				
			||||||
@ -340,6 +349,12 @@ class Consumer(object):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        return False
 | 
					        return False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    @staticmethod
 | 
				
			||||||
 | 
					    def _is_duplicate(doc):
 | 
				
			||||||
 | 
					        with open(doc, "rb") as f:
 | 
				
			||||||
 | 
					            checksum = hashlib.md5(f.read()).hexdigest()
 | 
				
			||||||
 | 
					        return Document.objects.filter(checksum=checksum).exists()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def image_to_string(args):
 | 
					def image_to_string(args):
 | 
				
			||||||
    img, lang = args
 | 
					    img, lang = args
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user