mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	add config options and documentation
Signed-off-by: florian on nixos (Florian Brandes) <florian.brandes@posteo.de>
This commit is contained in:
		
							parent
							
								
									37b3fde4e1
								
							
						
					
					
						commit
						c024b846c3
					
				@ -588,6 +588,27 @@ PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=<bool>
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
    Defaults to false.
 | 
					    Defaults to false.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PAPERLESS_CONSUMER_ENABLE_BARCODES=<bool>
 | 
				
			||||||
 | 
					    Enables the scanning and page separation based on detected barcodes.
 | 
				
			||||||
 | 
					    This allows for scanning and adding multiple documents per uploaded
 | 
				
			||||||
 | 
					    file, which are separated by one or multiple barcode pages.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    For ease of use, it is suggested to use a standardized separation page,
 | 
				
			||||||
 | 
					    e.g. `here <https://www.alliancegroup.co.uk/patch-codes.htm>`_.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    If no barcodes are detected in the uploaded file, no page separation
 | 
				
			||||||
 | 
					    will happen.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Defaults to true.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					PAPERLESS_CONSUMER_BARCODE_STRING=PATCHT
 | 
				
			||||||
 | 
					  Defines the string to be detected as a separator barcode.
 | 
				
			||||||
 | 
					  If paperless is used with the PATCH-T separator pages, users
 | 
				
			||||||
 | 
					  shouldn't change this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  Defaults to "PATCHT"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
 | 
					PAPERLESS_CONVERT_MEMORY_LIMIT=<num>
 | 
				
			||||||
    On smaller systems, or even in the case of Very Large Documents, the consumer
 | 
					    On smaller systems, or even in the case of Very Large Documents, the consumer
 | 
				
			||||||
 | 
				
			|||||||
@ -60,6 +60,8 @@
 | 
				
			|||||||
#PAPERLESS_CONSUMER_RECURSIVE=false
 | 
					#PAPERLESS_CONSUMER_RECURSIVE=false
 | 
				
			||||||
#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
 | 
					#PAPERLESS_CONSUMER_IGNORE_PATTERNS=[".DS_STORE/*", "._*", ".stfolder/*", ".stversions/*", ".localized/*", "desktop.ini"]
 | 
				
			||||||
#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 | 
					#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=false
 | 
				
			||||||
 | 
					#PAPERLESS_CONSUMER_ENABLE_BARCODES=true
 | 
				
			||||||
 | 
					#PAPERLESS_CONSUMER_ENABLE_BARCODES=PATCHT
 | 
				
			||||||
#PAPERLESS_OPTIMIZE_THUMBNAILS=true
 | 
					#PAPERLESS_OPTIMIZE_THUMBNAILS=true
 | 
				
			||||||
#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
					#PAPERLESS_PRE_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
				
			||||||
#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
					#PAPERLESS_POST_CONSUME_SCRIPT=/path/to/an/arbitrary/script.sh
 | 
				
			||||||
 | 
				
			|||||||
@ -24,8 +24,6 @@ from pikepdf import Pdf
 | 
				
			|||||||
from pyzbar import pyzbar
 | 
					from pyzbar import pyzbar
 | 
				
			||||||
from whoosh.writing import AsyncWriter
 | 
					from whoosh.writing import AsyncWriter
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# barcode decoder
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
logger = logging.getLogger("paperless.tasks")
 | 
					logger = logging.getLogger("paperless.tasks")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -100,12 +98,13 @@ def scan_file_for_separating_barcodes(filepath: str) -> list:
 | 
				
			|||||||
    Returns a list of pagenumbers, which separate the file
 | 
					    Returns a list of pagenumbers, which separate the file
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    separator_page_numbers = []
 | 
					    separator_page_numbers = []
 | 
				
			||||||
 | 
					    separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
 | 
				
			||||||
    # use a temporary directory in case the file os too big to handle in memory
 | 
					    # use a temporary directory in case the file os too big to handle in memory
 | 
				
			||||||
    with tempfile.TemporaryDirectory() as path:
 | 
					    with tempfile.TemporaryDirectory() as path:
 | 
				
			||||||
        pages_from_path = convert_from_path(filepath, output_folder=path)
 | 
					        pages_from_path = convert_from_path(filepath, output_folder=path)
 | 
				
			||||||
        for current_page_number, page in enumerate(pages_from_path):
 | 
					        for current_page_number, page in enumerate(pages_from_path):
 | 
				
			||||||
            current_barcodes = barcode_reader(page)
 | 
					            current_barcodes = barcode_reader(page)
 | 
				
			||||||
            if "b'PATCHT'" in current_barcodes:
 | 
					            if separator_barcode in current_barcodes:
 | 
				
			||||||
                separator_page_numbers = separator_page_numbers + [current_page_number]
 | 
					                separator_page_numbers = separator_page_numbers + [current_page_number]
 | 
				
			||||||
    return separator_page_numbers
 | 
					    return separator_page_numbers
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@ -163,13 +162,12 @@ def save_to_dir(filepath, newname=None, target_dir=settings.CONSUMPTION_DIR):
 | 
				
			|||||||
    Copies filepath to target_dir.
 | 
					    Copies filepath to target_dir.
 | 
				
			||||||
    Optionally rename the file.
 | 
					    Optionally rename the file.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    logger.debug(f"filepath: {str(filepath)}")
 | 
					 | 
				
			||||||
    logger.debug(f"newname: {str(newname)}")
 | 
					 | 
				
			||||||
    logger.debug(f"target_dir: {str(target_dir)}")
 | 
					 | 
				
			||||||
    if os.path.isfile(filepath) and os.path.isdir(target_dir):
 | 
					    if os.path.isfile(filepath) and os.path.isdir(target_dir):
 | 
				
			||||||
        dst = shutil.copy(filepath, target_dir)
 | 
					        dst = shutil.copy(filepath, target_dir)
 | 
				
			||||||
 | 
					        logging.debug(f"saved {str(filepath)} to {str(dst)}")
 | 
				
			||||||
        if newname:
 | 
					        if newname:
 | 
				
			||||||
            dst_new = os.path.join(target_dir, newname)
 | 
					            dst_new = os.path.join(target_dir, newname)
 | 
				
			||||||
 | 
					            logger.debug(f"moving {str(dst)} to {str(dst_new)}")
 | 
				
			||||||
            os.rename(dst, dst_new)
 | 
					            os.rename(dst, dst_new)
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
 | 
					        logger.warning(f"{str(filepath)} or {str(target_dir)} don't exist.")
 | 
				
			||||||
@ -186,7 +184,9 @@ def consume_file(
 | 
				
			|||||||
):
 | 
					):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # check for separators in current document
 | 
					    # check for separators in current document
 | 
				
			||||||
    separators = scan_file_for_separating_barcodes(path)
 | 
					    separators = []
 | 
				
			||||||
 | 
					    if settings.CONSUMER_ENABLE_BARCODES:
 | 
				
			||||||
 | 
					        separators = scan_file_for_separating_barcodes(path)
 | 
				
			||||||
    document_list = []
 | 
					    document_list = []
 | 
				
			||||||
    if separators == []:
 | 
					    if separators == []:
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
				
			|||||||
@ -98,7 +98,8 @@ class TestTasks(DirectoriesMixin, TestCase):
 | 
				
			|||||||
            "patch-code-t.pbm",
 | 
					            "patch-code-t.pbm",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        img = Image.open(test_file)
 | 
					        img = Image.open(test_file)
 | 
				
			||||||
        self.assertEqual(tasks.barcode_reader(img), ["b'PATCHT'"])
 | 
					        separator_barcode = "b'" + str(settings.CONSUMER_BARCODE_STRING) + "'"
 | 
				
			||||||
 | 
					        self.assertEqual(tasks.barcode_reader(img), [separator_barcode])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_barcode_reader2(self):
 | 
					    def test_barcode_reader2(self):
 | 
				
			||||||
        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
 | 
					        test_file = os.path.join(os.path.dirname(__file__), "samples", "simple.png")
 | 
				
			||||||
 | 
				
			|||||||
@ -462,6 +462,13 @@ CONSUMER_IGNORE_PATTERNS = list(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
 | 
					CONSUMER_SUBDIRS_AS_TAGS = __get_boolean("PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CONSUMER_ENABLE_BARCODES = __get_boolean(
 | 
				
			||||||
 | 
					    "PAPERLESS_CONSUMER_ENABLE_BARCODES",
 | 
				
			||||||
 | 
					    default="YES",
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CONSUMER_BARCODE_STRING = os.getenv("PAPERLESS_CONSUMER_BARCODE_STRING", "PATCHT")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 | 
					OPTIMIZE_THUMBNAILS = __get_boolean("PAPERLESS_OPTIMIZE_THUMBNAILS", "true")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 | 
					OCR_PAGES = int(os.getenv("PAPERLESS_OCR_PAGES", 0))
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user