mirror of
				https://github.com/paperless-ngx/paperless-ngx.git
				synced 2025-11-03 19:17:13 -05:00 
			
		
		
		
	explicitly add txt, md, and csv types for consumer and viewer; fix thumbnail generation
This commit is contained in:
		
							parent
							
								
									d6fedbec52
								
							
						
					
					
						commit
						4849249d86
					
				@ -188,7 +188,11 @@ class Document(models.Model):
 | 
			
		||||
    TYPE_JPG = "jpg"
 | 
			
		||||
    TYPE_GIF = "gif"
 | 
			
		||||
    TYPE_TIF = "tiff"
 | 
			
		||||
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,)
 | 
			
		||||
    TYPE_TXT = "txt"
 | 
			
		||||
    TYPE_CSV = "csv"
 | 
			
		||||
    TYPE_MD  = "md"
 | 
			
		||||
    TYPES = (TYPE_PDF, TYPE_PNG, TYPE_JPG, TYPE_GIF, TYPE_TIF,
 | 
			
		||||
             TYPE_TXT, TYPE_CSV, TYPE_MD)
 | 
			
		||||
 | 
			
		||||
    STORAGE_TYPE_UNENCRYPTED = "unencrypted"
 | 
			
		||||
    STORAGE_TYPE_GPG = "gpg"
 | 
			
		||||
@ -361,51 +365,52 @@ class FileInfo:
 | 
			
		||||
        )
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    formats = "pdf|jpe?g|png|gif|tiff?|te?xt|md|csv"
 | 
			
		||||
    REGEXES = OrderedDict([
 | 
			
		||||
        ("created-correspondent-title-tags", re.compile(
 | 
			
		||||
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
			
		||||
            r"(?P<correspondent>.*) - "
 | 
			
		||||
            r"(?P<title>.*) - "
 | 
			
		||||
            r"(?P<tags>[a-z0-9\-,]*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("created-title-tags", re.compile(
 | 
			
		||||
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
			
		||||
            r"(?P<title>.*) - "
 | 
			
		||||
            r"(?P<tags>[a-z0-9\-,]*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("created-correspondent-title", re.compile(
 | 
			
		||||
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
			
		||||
            r"(?P<correspondent>.*) - "
 | 
			
		||||
            r"(?P<title>.*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("created-title", re.compile(
 | 
			
		||||
            r"^(?P<created>\d\d\d\d\d\d\d\d(\d\d\d\d\d\d)?Z) - "
 | 
			
		||||
            r"(?P<title>.*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("correspondent-title-tags", re.compile(
 | 
			
		||||
            r"(?P<correspondent>.*) - "
 | 
			
		||||
            r"(?P<title>.*) - "
 | 
			
		||||
            r"(?P<tags>[a-z0-9\-,]*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("correspondent-title", re.compile(
 | 
			
		||||
            r"(?P<correspondent>.*) - "
 | 
			
		||||
            r"(?P<title>.*)?"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        )),
 | 
			
		||||
        ("title", re.compile(
 | 
			
		||||
            r"(?P<title>.*)"
 | 
			
		||||
            r"\.(?P<extension>pdf|jpe?g|png|gif|tiff?)$",
 | 
			
		||||
            r"\.(?P<extension>{})$".format(formats),
 | 
			
		||||
            flags=re.IGNORECASE
 | 
			
		||||
        ))
 | 
			
		||||
    ])
 | 
			
		||||
 | 
			
		||||
@ -48,6 +48,9 @@ class FetchView(SessionOrBasicAuthMixin, DetailView):
 | 
			
		||||
            Document.TYPE_JPG: "image/jpeg",
 | 
			
		||||
            Document.TYPE_GIF: "image/gif",
 | 
			
		||||
            Document.TYPE_TIF: "image/tiff",
 | 
			
		||||
            Document.TYPE_CSV: "text/csv",
 | 
			
		||||
            Document.TYPE_MD:  "text/markdown",
 | 
			
		||||
            Document.TYPE_TXT: "text/plain"
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if self.kwargs["kind"] == "thumb":
 | 
			
		||||
 | 
			
		||||
@ -10,7 +10,7 @@ from documents.parsers import DocumentParser, ParseError
 | 
			
		||||
 | 
			
		||||
class TextDocumentParser(DocumentParser):
 | 
			
		||||
    """
 | 
			
		||||
    This parser directly parses a text document (.txt or .md)
 | 
			
		||||
    This parser directly parses a text document (.txt, .md, or .csv)
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -30,18 +30,50 @@ class TextDocumentParser(DocumentParser):
 | 
			
		||||
        The thumbnail of a txt is just a 500px wide image of the text
 | 
			
		||||
        rendered onto a letter-sized page.
 | 
			
		||||
        """
 | 
			
		||||
        # The below is heavily cribbed from https://askubuntu.com/a/590951
 | 
			
		||||
 | 
			
		||||
        run_convert(
 | 
			
		||||
            self.CONVERT,
 | 
			
		||||
            "-size", "500x647",
 | 
			
		||||
            "xc:white",
 | 
			
		||||
        bg_color = "white"  # bg color
 | 
			
		||||
        text_color = "black"  # text color
 | 
			
		||||
        psize = [500, 647]  # icon size
 | 
			
		||||
        n_lines = 50  # number of lines to show
 | 
			
		||||
        output_file = os.path.join(self.tempdir, "convert-txt.png")
 | 
			
		||||
 | 
			
		||||
        temp_bg = os.path.join(self.tempdir, "bg.png")
 | 
			
		||||
        temp_txlayer = os.path.join(self.tempdir, "tx.png")
 | 
			
		||||
        picsize = "x".join([str(n) for n in psize])
 | 
			
		||||
        txsize = "x".join([str(n - 8) for n in psize])
 | 
			
		||||
 | 
			
		||||
        def create_bg():
 | 
			
		||||
            work_size = ",".join([str(n - 1) for n in psize])
 | 
			
		||||
            r = str(round(psize[0] / 10));
 | 
			
		||||
            rounded = ",".join([r, r])
 | 
			
		||||
            run_command(self.CONVERT, "-size ", picsize, ' xc:none -draw ',
 | 
			
		||||
                        '"fill ', bg_color, ' roundrectangle 0,0,',
 | 
			
		||||
                        work_size, ",", rounded, '" ', temp_bg)
 | 
			
		||||
 | 
			
		||||
        def read_text():
 | 
			
		||||
            with open(self.document_path, 'r') as src:
 | 
			
		||||
                lines = [l.strip() for l in src.readlines()]
 | 
			
		||||
                text = "\n".join([l for l in lines[:n_lines]])
 | 
			
		||||
                return text.replace('"', "'")
 | 
			
		||||
 | 
			
		||||
        def create_txlayer():
 | 
			
		||||
            run_command(self.CONVERT,
 | 
			
		||||
                        "-background none",
 | 
			
		||||
                        "-fill",
 | 
			
		||||
                        text_color,
 | 
			
		||||
                        "-pointsize", "12",
 | 
			
		||||
            "-fill", "black",
 | 
			
		||||
            "-draw", "\"text 0,12 \'$(cat {})\'\"".format(self.document_path),
 | 
			
		||||
            os.path.join(self.tempdir, "convert-txt.png")
 | 
			
		||||
        )
 | 
			
		||||
                        "-border 4 -bordercolor none",
 | 
			
		||||
                        "-size ", txsize,
 | 
			
		||||
                        ' caption:"', read_text(), '" ',
 | 
			
		||||
                        temp_txlayer)
 | 
			
		||||
 | 
			
		||||
        return os.path.join(self.tempdir, "convert-txt.png")
 | 
			
		||||
        create_txlayer()
 | 
			
		||||
        create_bg()
 | 
			
		||||
        run_command(self.CONVERT, temp_bg, temp_txlayer,
 | 
			
		||||
                    "-background None -layers merge ", output_file)
 | 
			
		||||
 | 
			
		||||
        return output_file
 | 
			
		||||
 | 
			
		||||
    def get_text(self):
 | 
			
		||||
 | 
			
		||||
@ -102,12 +134,13 @@ class TextDocumentParser(DocumentParser):
 | 
			
		||||
        return date
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def run_convert(*args):
 | 
			
		||||
def run_command(*args):
 | 
			
		||||
    environment = os.environ.copy()
 | 
			
		||||
    if settings.CONVERT_MEMORY_LIMIT:
 | 
			
		||||
        environment["MAGICK_MEMORY_LIMIT"] = settings.CONVERT_MEMORY_LIMIT
 | 
			
		||||
    if settings.CONVERT_TMPDIR:
 | 
			
		||||
        environment["MAGICK_TMPDIR"] = settings.CONVERT_TMPDIR
 | 
			
		||||
 | 
			
		||||
    if not subprocess.Popen(args, env=environment).wait() == 0:
 | 
			
		||||
    if not subprocess.Popen(' '.join(args), env=environment,
 | 
			
		||||
                            shell=True).wait() == 0:
 | 
			
		||||
        raise ParseError("Convert failed at {}".format(args))
 | 
			
		||||
@ -5,7 +5,7 @@ from .parsers import TextDocumentParser
 | 
			
		||||
 | 
			
		||||
class ConsumerDeclaration:
 | 
			
		||||
 | 
			
		||||
    MATCHING_FILES = re.compile("^.*\.(txt|md)$")
 | 
			
		||||
    MATCHING_FILES = re.compile("^.*\.(te?xt|md|csv)$")
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def handle(cls, sender, **kwargs):
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user